aboutsummaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/internal
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/internal')
-rw-r--r--vendor/golang.org/x/text/internal/colltab/collelem.go371
-rw-r--r--vendor/golang.org/x/text/internal/colltab/colltab.go105
-rw-r--r--vendor/golang.org/x/text/internal/colltab/contract.go145
-rw-r--r--vendor/golang.org/x/text/internal/colltab/iter.go178
-rw-r--r--vendor/golang.org/x/text/internal/colltab/numeric.go236
-rw-r--r--vendor/golang.org/x/text/internal/colltab/table.go275
-rw-r--r--vendor/golang.org/x/text/internal/colltab/trie.go159
-rw-r--r--vendor/golang.org/x/text/internal/colltab/weighter.go31
-rw-r--r--vendor/golang.org/x/text/internal/tag/tag.go100
-rw-r--r--vendor/golang.org/x/text/internal/ucd/ucd.go115
10 files changed, 1655 insertions, 60 deletions
diff --git a/vendor/golang.org/x/text/internal/colltab/collelem.go b/vendor/golang.org/x/text/internal/colltab/collelem.go
new file mode 100644
index 0000000..2855589
--- /dev/null
+++ b/vendor/golang.org/x/text/internal/colltab/collelem.go
@@ -0,0 +1,371 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package colltab
+
+import (
+ "fmt"
+ "unicode"
+)
+
+// Level identifies the collation comparison level.
+// The primary level corresponds to the basic sorting of text.
+// The secondary level corresponds to accents and related linguistic elements.
+// The tertiary level corresponds to casing and related concepts.
+// The quaternary level is derived from the other levels by the
+// various algorithms for handling variable elements.
+type Level int
+
+const (
+ Primary Level = iota
+ Secondary
+ Tertiary
+ Quaternary
+ Identity
+
+ NumLevels
+)
+
+const (
+ defaultSecondary = 0x20
+ defaultTertiary = 0x2
+ maxTertiary = 0x1F
+ MaxQuaternary = 0x1FFFFF // 21 bits.
+)
+
+// Elem is a representation of a collation element. This API provides ways to encode
+// and decode Elems. Implementations of collation tables may use values greater
+// or equal to PrivateUse for their own purposes. However, these should never be
+// returned by AppendNext.
+type Elem uint32
+
+const (
+ maxCE Elem = 0xAFFFFFFF
+ PrivateUse = minContract
+ minContract = 0xC0000000
+ maxContract = 0xDFFFFFFF
+ minExpand = 0xE0000000
+ maxExpand = 0xEFFFFFFF
+ minDecomp = 0xF0000000
+)
+
+type ceType int
+
+const (
+ ceNormal ceType = iota // ceNormal includes implicits (ce == 0)
+ ceContractionIndex // rune can be a start of a contraction
+ ceExpansionIndex // rune expands into a sequence of collation elements
+ ceDecompose // rune expands using NFKC decomposition
+)
+
+func (ce Elem) ctype() ceType {
+ if ce <= maxCE {
+ return ceNormal
+ }
+ if ce <= maxContract {
+ return ceContractionIndex
+ } else {
+ if ce <= maxExpand {
+ return ceExpansionIndex
+ }
+ return ceDecompose
+ }
+ panic("should not reach here")
+ return ceType(-1)
+}
+
+// For normal collation elements, we assume that a collation element either has
+// a primary or non-default secondary value, not both.
+// Collation elements with a primary value are of the form
+// 01pppppp pppppppp ppppppp0 ssssssss
+// - p* is primary collation value
+// - s* is the secondary collation value
+// 00pppppp pppppppp ppppppps sssttttt, where
+// - p* is primary collation value
+// - s* offset of secondary from default value.
+// - t* is the tertiary collation value
+// 100ttttt cccccccc pppppppp pppppppp
+// - t* is the tertiar collation value
+// - c* is the canonical combining class
+// - p* is the primary collation value
+// Collation elements with a secondary value are of the form
+// 1010cccc ccccssss ssssssss tttttttt, where
+// - c* is the canonical combining class
+// - s* is the secondary collation value
+// - t* is the tertiary collation value
+// 11qqqqqq qqqqqqqq qqqqqqq0 00000000
+// - q* quaternary value
+const (
+ ceTypeMask = 0xC0000000
+ ceTypeMaskExt = 0xE0000000
+ ceIgnoreMask = 0xF00FFFFF
+ ceType1 = 0x40000000
+ ceType2 = 0x00000000
+ ceType3or4 = 0x80000000
+ ceType4 = 0xA0000000
+ ceTypeQ = 0xC0000000
+ Ignore = ceType4
+ firstNonPrimary = 0x80000000
+ lastSpecialPrimary = 0xA0000000
+ secondaryMask = 0x80000000
+ hasTertiaryMask = 0x40000000
+ primaryValueMask = 0x3FFFFE00
+ maxPrimaryBits = 21
+ compactPrimaryBits = 16
+ maxSecondaryBits = 12
+ maxTertiaryBits = 8
+ maxCCCBits = 8
+ maxSecondaryCompactBits = 8
+ maxSecondaryDiffBits = 4
+ maxTertiaryCompactBits = 5
+ primaryShift = 9
+ compactSecondaryShift = 5
+ minCompactSecondary = defaultSecondary - 4
+)
+
+func makeImplicitCE(primary int) Elem {
+ return ceType1 | Elem(primary<<primaryShift) | defaultSecondary
+}
+
+// MakeElem returns an Elem for the given values. It will return an error
+// if the given combination of values is invalid.
+func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) {
+ if w := primary; w >= 1<<maxPrimaryBits || w < 0 {
+ return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
+ }
+ if w := secondary; w >= 1<<maxSecondaryBits || w < 0 {
+ return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits)
+ }
+ if w := tertiary; w >= 1<<maxTertiaryBits || w < 0 {
+ return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits)
+ }
+ ce := Elem(0)
+ if primary != 0 {
+ if ccc != 0 {
+ if primary >= 1<<compactPrimaryBits {
+ return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", primary, 1<<compactPrimaryBits)
+ }
+ if secondary != defaultSecondary {
+ return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", secondary, ccc)
+ }
+ ce = Elem(tertiary << (compactPrimaryBits + maxCCCBits))
+ ce |= Elem(ccc) << compactPrimaryBits
+ ce |= Elem(primary)
+ ce |= ceType3or4
+ } else if tertiary == defaultTertiary {
+ if secondary >= 1<<maxSecondaryCompactBits {
+ return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", secondary, 1<<maxSecondaryCompactBits)
+ }
+ ce = Elem(primary<<(maxSecondaryCompactBits+1) + secondary)
+ ce |= ceType1
+ } else {
+ d := secondary - defaultSecondary + maxSecondaryDiffBits
+ if d >= 1<<maxSecondaryDiffBits || d < 0 {
+ return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits)
+ }
+ if tertiary >= 1<<maxTertiaryCompactBits {
+ return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", tertiary, 1<<maxTertiaryCompactBits)
+ }
+ ce = Elem(primary<<maxSecondaryDiffBits + d)
+ ce = ce<<maxTertiaryCompactBits + Elem(tertiary)
+ }
+ } else {
+ ce = Elem(secondary<<maxTertiaryBits + tertiary)
+ ce += Elem(ccc) << (maxSecondaryBits + maxTertiaryBits)
+ ce |= ceType4
+ }
+ return ce, nil
+}
+
+// MakeQuaternary returns an Elem with the given quaternary value.
+func MakeQuaternary(v int) Elem {
+ return ceTypeQ | Elem(v<<primaryShift)
+}
+
+// Mask sets weights for any level smaller than l to 0.
+// The resulting Elem can be used to test for equality with
+// other Elems to which the same mask has been applied.
+func (ce Elem) Mask(l Level) uint32 {
+ return 0
+}
+
+// CCC returns the canonical combining class associated with the underlying character,
+// if applicable, or 0 otherwise.
+func (ce Elem) CCC() uint8 {
+ if ce&ceType3or4 != 0 {
+ if ce&ceType4 == ceType3or4 {
+ return uint8(ce >> 16)
+ }
+ return uint8(ce >> 20)
+ }
+ return 0
+}
+
+// Primary returns the primary collation weight for ce.
+func (ce Elem) Primary() int {
+ if ce >= firstNonPrimary {
+ if ce > lastSpecialPrimary {
+ return 0
+ }
+ return int(uint16(ce))
+ }
+ return int(ce&primaryValueMask) >> primaryShift
+}
+
+// Secondary returns the secondary collation weight for ce.
+func (ce Elem) Secondary() int {
+ switch ce & ceTypeMask {
+ case ceType1:
+ return int(uint8(ce))
+ case ceType2:
+ return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF)
+ case ceType3or4:
+ if ce < ceType4 {
+ return defaultSecondary
+ }
+ return int(ce>>8) & 0xFFF
+ case ceTypeQ:
+ return 0
+ }
+ panic("should not reach here")
+}
+
+// Tertiary returns the tertiary collation weight for ce.
+func (ce Elem) Tertiary() uint8 {
+ if ce&hasTertiaryMask == 0 {
+ if ce&ceType3or4 == 0 {
+ return uint8(ce & 0x1F)
+ }
+ if ce&ceType4 == ceType4 {
+ return uint8(ce)
+ }
+ return uint8(ce>>24) & 0x1F // type 2
+ } else if ce&ceTypeMask == ceType1 {
+ return defaultTertiary
+ }
+ // ce is a quaternary value.
+ return 0
+}
+
+func (ce Elem) updateTertiary(t uint8) Elem {
+ if ce&ceTypeMask == ceType1 {
+ // convert to type 4
+ nce := ce & primaryValueMask
+ nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
+ ce = nce
+ } else if ce&ceTypeMaskExt == ceType3or4 {
+ ce &= ^Elem(maxTertiary << 24)
+ return ce | (Elem(t) << 24)
+ } else {
+ // type 2 or 4
+ ce &= ^Elem(maxTertiary)
+ }
+ return ce | Elem(t)
+}
+
+// Quaternary returns the quaternary value if explicitly specified,
+// 0 if ce == Ignore, or MaxQuaternary otherwise.
+// Quaternary values are used only for shifted variants.
+func (ce Elem) Quaternary() int {
+ if ce&ceTypeMask == ceTypeQ {
+ return int(ce&primaryValueMask) >> primaryShift
+ } else if ce&ceIgnoreMask == Ignore {
+ return 0
+ }
+ return MaxQuaternary
+}
+
+// Weight returns the collation weight for the given level.
+func (ce Elem) Weight(l Level) int {
+ switch l {
+ case Primary:
+ return ce.Primary()
+ case Secondary:
+ return ce.Secondary()
+ case Tertiary:
+ return int(ce.Tertiary())
+ case Quaternary:
+ return ce.Quaternary()
+ }
+ return 0 // return 0 (ignore) for undefined levels.
+}
+
+// For contractions, collation elements are of the form
+// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
+// - n* is the size of the first node in the contraction trie.
+// - i* is the index of the first node in the contraction trie.
+// - b* is the offset into the contraction collation element table.
+// See contract.go for details on the contraction trie.
+const (
+ maxNBits = 4
+ maxTrieIndexBits = 12
+ maxContractOffsetBits = 13
+)
+
+func splitContractIndex(ce Elem) (index, n, offset int) {
+ n = int(ce & (1<<maxNBits - 1))
+ ce >>= maxNBits
+ index = int(ce & (1<<maxTrieIndexBits - 1))
+ ce >>= maxTrieIndexBits
+ offset = int(ce & (1<<maxContractOffsetBits - 1))
+ return
+}
+
+// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,
+// where b* is the index into the expansion sequence table.
+const maxExpandIndexBits = 16
+
+func splitExpandIndex(ce Elem) (index int) {
+ return int(uint16(ce))
+}
+
+// Some runes can be expanded using NFKD decomposition. Instead of storing the full
+// sequence of collation elements, we decompose the rune and lookup the collation
+// elements for each rune in the decomposition and modify the tertiary weights.
+// The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where
+// - v* is the replacement tertiary weight for the first rune,
+// - w* is the replacement tertiary weight for the second rune,
+// Tertiary weights of subsequent runes should be replaced with maxTertiary.
+// See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
+func splitDecompose(ce Elem) (t1, t2 uint8) {
+ return uint8(ce), uint8(ce >> 8)
+}
+
+const (
+ // These constants were taken from http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
+ minUnified rune = 0x4E00
+ maxUnified = 0x9FFF
+ minCompatibility = 0xF900
+ maxCompatibility = 0xFAFF
+ minRare = 0x3400
+ maxRare = 0x4DBF
+)
+const (
+ commonUnifiedOffset = 0x10000
+ rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF
+ otherOffset = 0x50000 // largest rune in rare is U+2FA1D
+ illegalOffset = otherOffset + int(unicode.MaxRune)
+ maxPrimary = illegalOffset + 1
+)
+
+// implicitPrimary returns the primary weight for the a rune
+// for which there is no entry for the rune in the collation table.
+// We take a different approach from the one specified in
+// http://unicode.org/reports/tr10/#Implicit_Weights,
+// but preserve the resulting relative ordering of the runes.
+func implicitPrimary(r rune) int {
+ if unicode.Is(unicode.Ideographic, r) {
+ if r >= minUnified && r <= maxUnified {
+ // The most common case for CJK.
+ return int(r) + commonUnifiedOffset
+ }
+ if r >= minCompatibility && r <= maxCompatibility {
+ // This will typically not hit. The DUCET explicitly specifies mappings
+ // for all characters that do not decompose.
+ return int(r) + commonUnifiedOffset
+ }
+ return int(r) + rareUnifiedOffset
+ }
+ return int(r) + otherOffset
+}
diff --git a/vendor/golang.org/x/text/internal/colltab/colltab.go b/vendor/golang.org/x/text/internal/colltab/colltab.go
new file mode 100644
index 0000000..02f2247
--- /dev/null
+++ b/vendor/golang.org/x/text/internal/colltab/colltab.go
@@ -0,0 +1,105 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package colltab contains functionality related to collation tables.
+// It is only to be used by the collate and search packages.
+package colltab // import "golang.org/x/text/internal/colltab"
+
+import (
+ "sort"
+
+ "golang.org/x/text/language"
+)
+
+// MatchLang finds the index of t in tags, using a matching algorithm used for
+// collation and search. tags[0] must be language.Und, the remaining tags should
+// be sorted alphabetically.
+//
+// Language matching for collation and search is different from the matching
+// defined by language.Matcher: the (inferred) base language must be an exact
+// match for the relevant fields. For example, "gsw" should not match "de".
+// Also the parent relation is different, as a parent may have a different
+// script. So usually the parent of zh-Hant is und, whereas for MatchLang it is
+// zh.
+func MatchLang(t language.Tag, tags []language.Tag) int {
+ // Canonicalize the values, including collapsing macro languages.
+ t, _ = language.All.Canonicalize(t)
+
+ base, conf := t.Base()
+ // Estimate the base language, but only use high-confidence values.
+ if conf < language.High {
+ // The root locale supports "search" and "standard". We assume that any
+ // implementation will only use one of both.
+ return 0
+ }
+
+ // Maximize base and script and normalize the tag.
+ if _, s, r := t.Raw(); (r != language.Region{}) {
+ p, _ := language.Raw.Compose(base, s, r)
+ // Taking the parent forces the script to be maximized.
+ p = p.Parent()
+ // Add back region and extensions.
+ t, _ = language.Raw.Compose(p, r, t.Extensions())
+ } else {
+ // Set the maximized base language.
+ t, _ = language.Raw.Compose(base, s, t.Extensions())
+ }
+
+ // Find start index of the language tag.
+ start := 1 + sort.Search(len(tags)-1, func(i int) bool {
+ b, _, _ := tags[i+1].Raw()
+ return base.String() <= b.String()
+ })
+ if start < len(tags) {
+ if b, _, _ := tags[start].Raw(); b != base {
+ return 0
+ }
+ }
+
+ // Besides the base language, script and region, only the collation type and
+ // the custom variant defined in the 'u' extension are used to distinguish a
+ // locale.
+ // Strip all variants and extensions and add back the custom variant.
+ tdef, _ := language.Raw.Compose(t.Raw())
+ tdef, _ = tdef.SetTypeForKey("va", t.TypeForKey("va"))
+
+ // First search for a specialized collation type, if present.
+ try := []language.Tag{tdef}
+ if co := t.TypeForKey("co"); co != "" {
+ tco, _ := tdef.SetTypeForKey("co", co)
+ try = []language.Tag{tco, tdef}
+ }
+
+ for _, tx := range try {
+ for ; tx != language.Und; tx = parent(tx) {
+ for i, t := range tags[start:] {
+ if b, _, _ := t.Raw(); b != base {
+ break
+ }
+ if tx == t {
+ return start + i
+ }
+ }
+ }
+ }
+ return 0
+}
+
+// parent computes the structural parent. This means inheritance may change
+// script. So, unlike the CLDR parent, parent(zh-Hant) == zh.
+func parent(t language.Tag) language.Tag {
+ if t.TypeForKey("va") != "" {
+ t, _ = t.SetTypeForKey("va", "")
+ return t
+ }
+ result := language.Und
+ if b, s, r := t.Raw(); (r != language.Region{}) {
+ result, _ = language.Raw.Compose(b, s, t.Extensions())
+ } else if (s != language.Script{}) {
+ result, _ = language.Raw.Compose(b, t.Extensions())
+ } else if (b != language.Base{}) {
+ result, _ = language.Raw.Compose(t.Extensions())
+ }
+ return result
+}
diff --git a/vendor/golang.org/x/text/internal/colltab/contract.go b/vendor/golang.org/x/text/internal/colltab/contract.go
new file mode 100644
index 0000000..25649d4
--- /dev/null
+++ b/vendor/golang.org/x/text/internal/colltab/contract.go
@@ -0,0 +1,145 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package colltab
+
+import "unicode/utf8"
+
+// For a description of ContractTrieSet, see text/collate/build/contract.go.
+
+type ContractTrieSet []struct{ L, H, N, I uint8 }
+
+// ctScanner is used to match a trie to an input sequence.
+// A contraction may match a non-contiguous sequence of bytes in an input string.
+// For example, if there is a contraction for <a, combining_ring>, it should match
+// the sequence <a, combining_cedilla, combining_ring>, as combining_cedilla does
+// not block combining_ring.
+// ctScanner does not automatically skip over non-blocking non-starters, but rather
+// retains the state of the last match and leaves it up to the user to continue
+// the match at the appropriate points.
+type ctScanner struct {
+ states ContractTrieSet
+ s []byte
+ n int
+ index int
+ pindex int
+ done bool
+}
+
+type ctScannerString struct {
+ states ContractTrieSet
+ s string
+ n int
+ index int
+ pindex int
+ done bool
+}
+
+func (t ContractTrieSet) scanner(index, n int, b []byte) ctScanner {
+ return ctScanner{s: b, states: t[index:], n: n}
+}
+
+func (t ContractTrieSet) scannerString(index, n int, str string) ctScannerString {
+ return ctScannerString{s: str, states: t[index:], n: n}
+}
+
+// result returns the offset i and bytes consumed p so far. If no suffix
+// matched, i and p will be 0.
+func (s *ctScanner) result() (i, p int) {
+ return s.index, s.pindex
+}
+
+func (s *ctScannerString) result() (i, p int) {
+ return s.index, s.pindex
+}
+
+const (
+ final = 0
+ noIndex = 0xFF
+)
+
+// scan matches the longest suffix at the current location in the input
+// and returns the number of bytes consumed.
+func (s *ctScanner) scan(p int) int {
+ pr := p // the p at the rune start
+ str := s.s
+ states, n := s.states, s.n
+ for i := 0; i < n && p < len(str); {
+ e := states[i]
+ c := str[p]
+ // TODO: a significant number of contractions are of a form that
+ // cannot match discontiguous UTF-8 in a normalized string. We could let
+ // a negative value of e.n mean that we can set s.done = true and avoid
+ // the need for additional matches.
+ if c >= e.L {
+ if e.L == c {
+ p++
+ if e.I != noIndex {
+ s.index = int(e.I)
+ s.pindex = p
+ }
+ if e.N != final {
+ i, states, n = 0, states[int(e.H)+n:], int(e.N)
+ if p >= len(str) || utf8.RuneStart(str[p]) {
+ s.states, s.n, pr = states, n, p
+ }
+ } else {
+ s.done = true
+ return p
+ }
+ continue
+ } else if e.N == final && c <= e.H {
+ p++
+ s.done = true
+ s.index = int(c-e.L) + int(e.I)
+ s.pindex = p
+ return p
+ }
+ }
+ i++
+ }
+ return pr
+}
+
+// scan is a verbatim copy of ctScanner.scan.
+func (s *ctScannerString) scan(p int) int {
+ pr := p // the p at the rune start
+ str := s.s
+ states, n := s.states, s.n
+ for i := 0; i < n && p < len(str); {
+ e := states[i]
+ c := str[p]
+ // TODO: a significant number of contractions are of a form that
+ // cannot match discontiguous UTF-8 in a normalized string. We could let
+ // a negative value of e.n mean that we can set s.done = true and avoid
+ // the need for additional matches.
+ if c >= e.L {
+ if e.L == c {
+ p++
+ if e.I != noIndex {
+ s.index = int(e.I)
+ s.pindex = p
+ }
+ if e.N != final {
+ i, states, n = 0, states[int(e.H)+n:], int(e.N)
+ if p >= len(str) || utf8.RuneStart(str[p]) {
+ s.states, s.n, pr = states, n, p
+ }
+ } else {
+ s.done = true
+ return p
+ }
+ continue
+ } else if e.N == final && c <= e.H {
+ p++
+ s.done = true
+ s.index = int(c-e.L) + int(e.I)
+ s.pindex = p
+ return p
+ }
+ }
+ i++
+ }
+ return pr
+}
diff --git a/vendor/golang.org/x/text/internal/colltab/iter.go b/vendor/golang.org/x/text/internal/colltab/iter.go
new file mode 100644
index 0000000..c1b1ba8
--- /dev/null
+++ b/vendor/golang.org/x/text/internal/colltab/iter.go
@@ -0,0 +1,178 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package colltab
+
+// An Iter incrementally converts chunks of the input text to collation
+// elements, while ensuring that the collation elements are in normalized order
+// (that is, they are in the order as if the input text were normalized first).
+type Iter struct {
+ Weighter Weighter
+ Elems []Elem
+ // N is the number of elements in Elems that will not be reordered on
+ // subsequent iterations, N <= len(Elems).
+ N int
+
+ bytes []byte
+ str string
+ // Because the Elems buffer may contain collation elements that are needed
+ // for look-ahead, we need two positions in the text (bytes or str): one for
+ // the end position in the text for the current iteration and one for the
+ // start of the next call to appendNext.
+ pEnd int // end position in text corresponding to N.
+ pNext int // pEnd <= pNext.
+}
+
+// Reset sets the position in the current input text to p and discards any
+// results obtained so far.
+func (i *Iter) Reset(p int) {
+ i.Elems = i.Elems[:0]
+ i.N = 0
+ i.pEnd = p
+ i.pNext = p
+}
+
+// Len returns the length of the input text.
+func (i *Iter) Len() int {
+ if i.bytes != nil {
+ return len(i.bytes)
+ }
+ return len(i.str)
+}
+
+// Discard removes the collation elements up to N.
+func (i *Iter) Discard() {
+ // TODO: change this such that only modifiers following starters will have
+ // to be copied.
+ i.Elems = i.Elems[:copy(i.Elems, i.Elems[i.N:])]
+ i.N = 0
+}
+
+// End returns the end position of the input text for which Next has returned
+// results.
+func (i *Iter) End() int {
+ return i.pEnd
+}
+
+// SetInput resets i to input s.
+func (i *Iter) SetInput(s []byte) {
+ i.bytes = s
+ i.str = ""
+ i.Reset(0)
+}
+
+// SetInputString resets i to input s.
+func (i *Iter) SetInputString(s string) {
+ i.str = s
+ i.bytes = nil
+ i.Reset(0)
+}
+
+func (i *Iter) done() bool {
+ return i.pNext >= len(i.str) && i.pNext >= len(i.bytes)
+}
+
+func (i *Iter) appendNext() bool {
+ if i.done() {
+ return false
+ }
+ var sz int
+ if i.bytes == nil {
+ i.Elems, sz = i.Weighter.AppendNextString(i.Elems, i.str[i.pNext:])
+ } else {
+ i.Elems, sz = i.Weighter.AppendNext(i.Elems, i.bytes[i.pNext:])
+ }
+ if sz == 0 {
+ sz = 1
+ }
+ i.pNext += sz
+ return true
+}
+
+// Next appends Elems to the internal array. On each iteration, it will either
+// add starters or modifiers. In the majority of cases, an Elem with a primary
+// value > 0 will have a CCC of 0. The CCC values of collation elements are also
+// used to detect if the input string was not normalized and to adjust the
+// result accordingly.
+func (i *Iter) Next() bool {
+ if i.N == len(i.Elems) && !i.appendNext() {
+ return false
+ }
+
+ // Check if the current segment starts with a starter.
+ prevCCC := i.Elems[len(i.Elems)-1].CCC()
+ if prevCCC == 0 {
+ i.N = len(i.Elems)
+ i.pEnd = i.pNext
+ return true
+ } else if i.Elems[i.N].CCC() == 0 {
+ // set i.N to only cover part of i.Elems for which prevCCC == 0 and
+ // use rest for the next call to next.
+ for i.N++; i.N < len(i.Elems) && i.Elems[i.N].CCC() == 0; i.N++ {
+ }
+ i.pEnd = i.pNext
+ return true
+ }
+
+ // The current (partial) segment starts with modifiers. We need to collect
+ // all successive modifiers to ensure that they are normalized.
+ for {
+ p := len(i.Elems)
+ i.pEnd = i.pNext
+ if !i.appendNext() {
+ break
+ }
+
+ if ccc := i.Elems[p].CCC(); ccc == 0 || len(i.Elems)-i.N > maxCombiningCharacters {
+ // Leave the starter for the next iteration. This ensures that we
+ // do not return sequences of collation elements that cross two
+ // segments.
+ //
+ // TODO: handle large number of combining characters by fully
+ // normalizing the input segment before iteration. This ensures
+ // results are consistent across the text repo.
+ i.N = p
+ return true
+ } else if ccc < prevCCC {
+ i.doNorm(p, ccc) // should be rare, never occurs for NFD and FCC.
+ } else {
+ prevCCC = ccc
+ }
+ }
+
+ done := len(i.Elems) != i.N
+ i.N = len(i.Elems)
+ return done
+}
+
+// nextNoNorm is the same as next, but does not "normalize" the collation
+// elements.
+func (i *Iter) nextNoNorm() bool {
+ // TODO: remove this function. Using this instead of next does not seem
+ // to improve performance in any significant way. We retain this until
+ // later for evaluation purposes.
+ if i.done() {
+ return false
+ }
+ i.appendNext()
+ i.N = len(i.Elems)
+ return true
+}
+
+const maxCombiningCharacters = 30
+
+// doNorm reorders the collation elements in i.Elems.
+// It assumes that blocks of collation elements added with appendNext
+// either start and end with the same CCC or start with CCC == 0.
+// This allows for a single insertion point for the entire block.
+// The correctness of this assumption is verified in builder.go.
+func (i *Iter) doNorm(p int, ccc uint8) {
+ n := len(i.Elems)
+ k := p
+ for p--; p > i.N && ccc < i.Elems[p-1].CCC(); p-- {
+ }
+ i.Elems = append(i.Elems, i.Elems[p:k]...)
+ copy(i.Elems[p:], i.Elems[k:])
+ i.Elems = i.Elems[:n]
+}
diff --git a/vendor/golang.org/x/text/internal/colltab/numeric.go b/vendor/golang.org/x/text/internal/colltab/numeric.go
new file mode 100644
index 0000000..38c255c
--- /dev/null
+++ b/vendor/golang.org/x/text/internal/colltab/numeric.go
@@ -0,0 +1,236 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package colltab
+
+import (
+ "unicode"
+ "unicode/utf8"
+)
+
+// NewNumericWeighter wraps w to replace individual digits to sort based on their
+// numeric value.
+//
+// Weighter w must have a free primary weight after the primary weight for 9.
+// If this is not the case, numeric value will sort at the same primary level
+// as the first primary sorting after 9.
+func NewNumericWeighter(w Weighter) Weighter {
+ getElem := func(s string) Elem {
+ elems, _ := w.AppendNextString(nil, s)
+ return elems[0]
+ }
+ nine := getElem("9")
+
+ // Numbers should order before zero, but the DUCET has no room for this.
+ // TODO: move before zero once we use fractional collation elements.
+ ns, _ := MakeElem(nine.Primary()+1, nine.Secondary(), int(nine.Tertiary()), 0)
+
+ return &numericWeighter{
+ Weighter: w,
+
+ // We assume that w sorts digits of different kinds in order of numeric
+ // value and that the tertiary weight order is preserved.
+ //
+ // TODO: evaluate whether it is worth basing the ranges on the Elem
+ // encoding itself once the move to fractional weights is complete.
+ zero: getElem("0"),
+ zeroSpecialLo: getElem("0"), // U+FF10 FULLWIDTH DIGIT ZERO
+ zeroSpecialHi: getElem("₀"), // U+2080 SUBSCRIPT ZERO
+ nine: nine,
+ nineSpecialHi: getElem("₉"), // U+2089 SUBSCRIPT NINE
+ numberStart: ns,
+ }
+}
+
+// A numericWeighter translates a stream of digits into a stream of weights
+// representing the numeric value.
+type numericWeighter struct {
+ Weighter
+
+ // The Elems below all demarcate boundaries of specific ranges. With the
+ // current element encoding digits are in two ranges: normal (default
+ // tertiary value) and special. For most languages, digits have collation
+ // elements in the normal range.
+ //
+ // Note: the range tests are very specific for the element encoding used by
+ // this implementation. The tests in collate_test.go are designed to fail
+ // if this code is not updated when an encoding has changed.
+
+ zero Elem // normal digit zero
+ zeroSpecialLo Elem // special digit zero, low tertiary value
+ zeroSpecialHi Elem // special digit zero, high tertiary value
+ nine Elem // normal digit nine
+ nineSpecialHi Elem // special digit nine
+ numberStart Elem
+}
+
+// AppendNext calls the namesake of the underlying weigher, but replaces single
+// digits with weights representing their value.
+func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) {
+ ce, n = nw.Weighter.AppendNext(buf, s)
+ nc := numberConverter{
+ elems: buf,
+ w: nw,
+ b: s,
+ }
+ isZero, ok := nc.checkNextDigit(ce)
+ if !ok {
+ return ce, n
+ }
+ // ce might have been grown already, so take it instead of buf.
+ nc.init(ce, len(buf), isZero)
+ for n < len(s) {
+ ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:])
+ nc.b = s
+ n += sz
+ if !nc.update(ce) {
+ break
+ }
+ }
+ return nc.result(), n
+}
+
+// AppendNextString calls the namesake of the underlying weigher, but replaces
+// single digits with weights representing their value.
+func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n int) {
+ ce, n = nw.Weighter.AppendNextString(buf, s)
+ nc := numberConverter{
+ elems: buf,
+ w: nw,
+ s: s,
+ }
+ isZero, ok := nc.checkNextDigit(ce)
+ if !ok {
+ return ce, n
+ }
+ nc.init(ce, len(buf), isZero)
+ for n < len(s) {
+ ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:])
+ nc.s = s
+ n += sz
+ if !nc.update(ce) {
+ break
+ }
+ }
+ return nc.result(), n
+}
+
+type numberConverter struct {
+ w *numericWeighter
+
+ elems []Elem
+ nDigits int
+ lenIndex int
+
+ s string // set if the input was of type string
+ b []byte // set if the input was of type []byte
+}
+
+// init completes initialization of a numberConverter and prepares it for adding
+// more digits. elems is assumed to have a digit starting at oldLen.
+func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) {
+ // Insert a marker indicating the start of a number and and a placeholder
+ // for the number of digits.
+ if isZero {
+ elems = append(elems[:oldLen], nc.w.numberStart, 0)
+ } else {
+ elems = append(elems, 0, 0)
+ copy(elems[oldLen+2:], elems[oldLen:])
+ elems[oldLen] = nc.w.numberStart
+ elems[oldLen+1] = 0
+
+ nc.nDigits = 1
+ }
+ nc.elems = elems
+ nc.lenIndex = oldLen + 1
+}
+
+// checkNextDigit reports whether bufNew adds a single digit relative to the old
+// buffer. If it does, it also reports whether this digit is zero.
+func (nc *numberConverter) checkNextDigit(bufNew []Elem) (isZero, ok bool) {
+ if len(nc.elems) >= len(bufNew) {
+ return false, false
+ }
+ e := bufNew[len(nc.elems)]
+ if e < nc.w.zeroSpecialLo || nc.w.nine < e {
+ // Not a number.
+ return false, false
+ }
+ if e < nc.w.zero {
+ if e > nc.w.nineSpecialHi {
+ // Not a number.
+ return false, false
+ }
+ if !nc.isDigit() {
+ return false, false
+ }
+ isZero = e <= nc.w.zeroSpecialHi
+ } else {
+ // This is the common case if we encounter a digit.
+ isZero = e == nc.w.zero
+ }
+ // Test the remaining added collation elements have a zero primary value.
+ if n := len(bufNew) - len(nc.elems); n > 1 {
+ for i := len(nc.elems) + 1; i < len(bufNew); i++ {
+ if bufNew[i].Primary() != 0 {
+ return false, false
+ }
+ }
+ // In some rare cases, collation elements will encode runes in
+ // unicode.No as a digit. For example Ethiopic digits (U+1369 - U+1371)
+ // are not in Nd. Also some digits that clearly belong in unicode.No,
+ // like U+0C78 TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR, have
+ // collation elements indistinguishable from normal digits.
+ // Unfortunately, this means we need to make this check for nearly all
+ // non-Latin digits.
+ //
+ // TODO: check the performance impact and find something better if it is
+ // an issue.
+ if !nc.isDigit() {
+ return false, false
+ }
+ }
+ return isZero, true
+}
+
+func (nc *numberConverter) isDigit() bool {
+ if nc.b != nil {
+ r, _ := utf8.DecodeRune(nc.b)
+ return unicode.In(r, unicode.Nd)
+ }
+ r, _ := utf8.DecodeRuneInString(nc.s)
+ return unicode.In(r, unicode.Nd)
+}
+
+// We currently support a maximum of about 2M digits (the number of primary
+// values). Such numbers will compare correctly against small numbers, but their
+// comparison against other large numbers is undefined.
+//
+// TODO: define a proper fallback, such as comparing large numbers textually or
+// actually allowing numbers of unlimited length.
+//
+// TODO: cap this to a lower number (like 100) and maybe allow a larger number
+// in an option?
+const maxDigits = 1<<maxPrimaryBits - 1
+
+func (nc *numberConverter) update(elems []Elem) bool {
+ isZero, ok := nc.checkNextDigit(elems)
+ if nc.nDigits == 0 && isZero {
+ return true
+ }
+ nc.elems = elems
+ if !ok {
+ return false
+ }
+ nc.nDigits++
+ return nc.nDigits < maxDigits
+}
+
+// result fills in the length element for the digit sequence and returns the
+// completed collation elements.
+func (nc *numberConverter) result() []Elem {
+ e, _ := MakeElem(nc.nDigits, defaultSecondary, defaultTertiary, 0)
+ nc.elems[nc.lenIndex] = e
+ return nc.elems
+}
diff --git a/vendor/golang.org/x/text/internal/colltab/table.go b/vendor/golang.org/x/text/internal/colltab/table.go
new file mode 100644
index 0000000..e26e36d
--- /dev/null
+++ b/vendor/golang.org/x/text/internal/colltab/table.go
@@ -0,0 +1,275 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package colltab
+
+import (
+ "unicode/utf8"
+
+ "golang.org/x/text/unicode/norm"
+)
+
+// Table holds all collation data for a given collation ordering.
+type Table struct {
+ Index Trie // main trie
+
+ // expansion info
+ ExpandElem []uint32
+
+ // contraction info
+ ContractTries ContractTrieSet
+ ContractElem []uint32
+ MaxContractLen int
+ VariableTop uint32
+}
+
+func (t *Table) AppendNext(w []Elem, b []byte) (res []Elem, n int) {
+ return t.appendNext(w, source{bytes: b})
+}
+
+func (t *Table) AppendNextString(w []Elem, s string) (res []Elem, n int) {
+ return t.appendNext(w, source{str: s})
+}
+
+func (t *Table) Start(p int, b []byte) int {
+ // TODO: implement
+ panic("not implemented")
+}
+
+func (t *Table) StartString(p int, s string) int {
+ // TODO: implement
+ panic("not implemented")
+}
+
+func (t *Table) Domain() []string {
+ // TODO: implement
+ panic("not implemented")
+}
+
+func (t *Table) Top() uint32 {
+ return t.VariableTop
+}
+
+type source struct {
+ str string
+ bytes []byte
+}
+
+func (src *source) lookup(t *Table) (ce Elem, sz int) {
+ if src.bytes == nil {
+ return t.Index.lookupString(src.str)
+ }
+ return t.Index.lookup(src.bytes)
+}
+
+func (src *source) tail(sz int) {
+ if src.bytes == nil {
+ src.str = src.str[sz:]
+ } else {
+ src.bytes = src.bytes[sz:]
+ }
+}
+
+func (src *source) nfd(buf []byte, end int) []byte {
+ if src.bytes == nil {
+ return norm.NFD.AppendString(buf[:0], src.str[:end])
+ }
+ return norm.NFD.Append(buf[:0], src.bytes[:end]...)
+}
+
+func (src *source) rune() (r rune, sz int) {
+ if src.bytes == nil {
+ return utf8.DecodeRuneInString(src.str)
+ }
+ return utf8.DecodeRune(src.bytes)
+}
+
+func (src *source) properties(f norm.Form) norm.Properties {
+ if src.bytes == nil {
+ return f.PropertiesString(src.str)
+ }
+ return f.Properties(src.bytes)
+}
+
+// appendNext appends the weights corresponding to the next rune or
+// contraction in s. If a contraction is matched to a discontinuous
+// sequence of runes, the weights for the interstitial runes are
+// appended as well. It returns a new slice that includes the appended
+// weights and the number of bytes consumed from s.
+func (t *Table) appendNext(w []Elem, src source) (res []Elem, n int) {
+ ce, sz := src.lookup(t)
+ tp := ce.ctype()
+ if tp == ceNormal {
+ if ce == 0 {
+ r, _ := src.rune()
+ const (
+ hangulSize = 3
+ firstHangul = 0xAC00
+ lastHangul = 0xD7A3
+ )
+ if r >= firstHangul && r <= lastHangul {
+ // TODO: performance can be considerably improved here.
+ n = sz
+ var buf [16]byte // Used for decomposing Hangul.
+ for b := src.nfd(buf[:0], hangulSize); len(b) > 0; b = b[sz:] {
+ ce, sz = t.Index.lookup(b)
+ w = append(w, ce)
+ }
+ return w, n
+ }
+ ce = makeImplicitCE(implicitPrimary(r))
+ }
+ w = append(w, ce)
+ } else if tp == ceExpansionIndex {
+ w = t.appendExpansion(w, ce)
+ } else if tp == ceContractionIndex {
+ n := 0
+ src.tail(sz)
+ if src.bytes == nil {
+ w, n = t.matchContractionString(w, ce, src.str)
+ } else {
+ w, n = t.matchContraction(w, ce, src.bytes)
+ }
+ sz += n
+ } else if tp == ceDecompose {
+ // Decompose using NFKD and replace tertiary weights.
+ t1, t2 := splitDecompose(ce)
+ i := len(w)
+ nfkd := src.properties(norm.NFKD).Decomposition()
+ for p := 0; len(nfkd) > 0; nfkd = nfkd[p:] {
+ w, p = t.appendNext(w, source{bytes: nfkd})
+ }
+ w[i] = w[i].updateTertiary(t1)
+ if i++; i < len(w) {
+ w[i] = w[i].updateTertiary(t2)
+ for i++; i < len(w); i++ {
+ w[i] = w[i].updateTertiary(maxTertiary)
+ }
+ }
+ }
+ return w, sz
+}
+
+func (t *Table) appendExpansion(w []Elem, ce Elem) []Elem {
+ i := splitExpandIndex(ce)
+ n := int(t.ExpandElem[i])
+ i++
+ for _, ce := range t.ExpandElem[i : i+n] {
+ w = append(w, Elem(ce))
+ }
+ return w
+}
+
+func (t *Table) matchContraction(w []Elem, ce Elem, suffix []byte) ([]Elem, int) {
+ index, n, offset := splitContractIndex(ce)
+
+ scan := t.ContractTries.scanner(index, n, suffix)
+ buf := [norm.MaxSegmentSize]byte{}
+ bufp := 0
+ p := scan.scan(0)
+
+ if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
+ // By now we should have filtered most cases.
+ p0 := p
+ bufn := 0
+ rune := norm.NFD.Properties(suffix[p:])
+ p += rune.Size()
+ if rune.LeadCCC() != 0 {
+ prevCC := rune.TrailCCC()
+ // A gap may only occur in the last normalization segment.
+ // This also ensures that len(scan.s) < norm.MaxSegmentSize.
+ if end := norm.NFD.FirstBoundary(suffix[p:]); end != -1 {
+ scan.s = suffix[:p+end]
+ }
+ for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
+ rune = norm.NFD.Properties(suffix[p:])
+ if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
+ break
+ }
+ prevCC = rune.TrailCCC()
+ if pp := scan.scan(p); pp != p {
+ // Copy the interstitial runes for later processing.
+ bufn += copy(buf[bufn:], suffix[p0:p])
+ if scan.pindex == pp {
+ bufp = bufn
+ }
+ p, p0 = pp, pp
+ } else {
+ p += rune.Size()
+ }
+ }
+ }
+ }
+ // Append weights for the matched contraction, which may be an expansion.
+ i, n := scan.result()
+ ce = Elem(t.ContractElem[i+offset])
+ if ce.ctype() == ceNormal {
+ w = append(w, ce)
+ } else {
+ w = t.appendExpansion(w, ce)
+ }
+ // Append weights for the runes in the segment not part of the contraction.
+ for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
+ w, p = t.appendNext(w, source{bytes: b})
+ }
+ return w, n
+}
+
+// TODO: unify the two implementations. This is best done after first simplifying
+// the algorithm taking into account the inclusion of both NFC and NFD forms
+// in the table.
+func (t *Table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem, int) {
+ index, n, offset := splitContractIndex(ce)
+
+ scan := t.ContractTries.scannerString(index, n, suffix)
+ buf := [norm.MaxSegmentSize]byte{}
+ bufp := 0
+ p := scan.scan(0)
+
+ if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
+ // By now we should have filtered most cases.
+ p0 := p
+ bufn := 0
+ rune := norm.NFD.PropertiesString(suffix[p:])
+ p += rune.Size()
+ if rune.LeadCCC() != 0 {
+ prevCC := rune.TrailCCC()
+ // A gap may only occur in the last normalization segment.
+ // This also ensures that len(scan.s) < norm.MaxSegmentSize.
+ if end := norm.NFD.FirstBoundaryInString(suffix[p:]); end != -1 {
+ scan.s = suffix[:p+end]
+ }
+ for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
+ rune = norm.NFD.PropertiesString(suffix[p:])
+ if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
+ break
+ }
+ prevCC = rune.TrailCCC()
+ if pp := scan.scan(p); pp != p {
+ // Copy the interstitial runes for later processing.
+ bufn += copy(buf[bufn:], suffix[p0:p])
+ if scan.pindex == pp {
+ bufp = bufn
+ }
+ p, p0 = pp, pp
+ } else {
+ p += rune.Size()
+ }
+ }
+ }
+ }
+ // Append weights for the matched contraction, which may be an expansion.
+ i, n := scan.result()
+ ce = Elem(t.ContractElem[i+offset])
+ if ce.ctype() == ceNormal {
+ w = append(w, ce)
+ } else {
+ w = t.appendExpansion(w, ce)
+ }
+ // Append weights for the runes in the segment not part of the contraction.
+ for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
+ w, p = t.appendNext(w, source{bytes: b})
+ }
+ return w, n
+}
diff --git a/vendor/golang.org/x/text/internal/colltab/trie.go b/vendor/golang.org/x/text/internal/colltab/trie.go
new file mode 100644
index 0000000..a0eaa0d
--- /dev/null
+++ b/vendor/golang.org/x/text/internal/colltab/trie.go
@@ -0,0 +1,159 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The trie in this file is used to associate the first full character in an
+// UTF-8 string to a collation element. All but the last byte in a UTF-8 byte
+// sequence are used to lookup offsets in the index table to be used for the
+// next byte. The last byte is used to index into a table of collation elements.
+// For a full description, see go.text/collate/build/trie.go.
+
+package colltab
+
+const blockSize = 64
+
+type Trie struct {
+ Index0 []uint16 // index for first byte (0xC0-0xFF)
+ Values0 []uint32 // index for first byte (0x00-0x7F)
+ Index []uint16
+ Values []uint32
+}
+
+const (
+ t1 = 0x00 // 0000 0000
+ tx = 0x80 // 1000 0000
+ t2 = 0xC0 // 1100 0000
+ t3 = 0xE0 // 1110 0000
+ t4 = 0xF0 // 1111 0000
+ t5 = 0xF8 // 1111 1000
+ t6 = 0xFC // 1111 1100
+ te = 0xFE // 1111 1110
+)
+
+func (t *Trie) lookupValue(n uint16, b byte) Elem {
+ return Elem(t.Values[int(n)<<6+int(b)])
+}
+
+// lookup returns the trie value for the first UTF-8 encoding in s and
+// the width in bytes of this encoding. The size will be 0 if s does not
+// hold enough bytes to complete the encoding. len(s) must be greater than 0.
+func (t *Trie) lookup(s []byte) (v Elem, sz int) {
+ c0 := s[0]
+ switch {
+ case c0 < tx:
+ return Elem(t.Values0[c0]), 1
+ case c0 < t2:
+ return 0, 1
+ case c0 < t3:
+ if len(s) < 2 {
+ return 0, 0
+ }
+ i := t.Index0[c0]
+ c1 := s[1]
+ if c1 < tx || t2 <= c1 {
+ return 0, 1
+ }
+ return t.lookupValue(i, c1), 2
+ case c0 < t4:
+ if len(s) < 3 {
+ return 0, 0
+ }
+ i := t.Index0[c0]
+ c1 := s[1]
+ if c1 < tx || t2 <= c1 {
+ return 0, 1
+ }
+ o := int(i)<<6 + int(c1)
+ i = t.Index[o]
+ c2 := s[2]
+ if c2 < tx || t2 <= c2 {
+ return 0, 2
+ }
+ return t.lookupValue(i, c2), 3
+ case c0 < t5:
+ if len(s) < 4 {
+ return 0, 0
+ }
+ i := t.Index0[c0]
+ c1 := s[1]
+ if c1 < tx || t2 <= c1 {
+ return 0, 1
+ }
+ o := int(i)<<6 + int(c1)
+ i = t.Index[o]
+ c2 := s[2]
+ if c2 < tx || t2 <= c2 {
+ return 0, 2
+ }
+ o = int(i)<<6 + int(c2)
+ i = t.Index[o]
+ c3 := s[3]
+ if c3 < tx || t2 <= c3 {
+ return 0, 3
+ }
+ return t.lookupValue(i, c3), 4
+ }
+ // Illegal rune
+ return 0, 1
+}
+
+// The body of lookupString is a verbatim copy of that of lookup.
+func (t *Trie) lookupString(s string) (v Elem, sz int) {
+ c0 := s[0]
+ switch {
+ case c0 < tx:
+ return Elem(t.Values0[c0]), 1
+ case c0 < t2:
+ return 0, 1
+ case c0 < t3:
+ if len(s) < 2 {
+ return 0, 0
+ }
+ i := t.Index0[c0]
+ c1 := s[1]
+ if c1 < tx || t2 <= c1 {
+ return 0, 1
+ }
+ return t.lookupValue(i, c1), 2
+ case c0 < t4:
+ if len(s) < 3 {
+ return 0, 0
+ }
+ i := t.Index0[c0]
+ c1 := s[1]
+ if c1 < tx || t2 <= c1 {
+ return 0, 1
+ }
+ o := int(i)<<6 + int(c1)
+ i = t.Index[o]
+ c2 := s[2]
+ if c2 < tx || t2 <= c2 {
+ return 0, 2
+ }
+ return t.lookupValue(i, c2), 3
+ case c0 < t5:
+ if len(s) < 4 {
+ return 0, 0
+ }
+ i := t.Index0[c0]
+ c1 := s[1]
+ if c1 < tx || t2 <= c1 {
+ return 0, 1
+ }
+ o := int(i)<<6 + int(c1)
+ i = t.Index[o]
+ c2 := s[2]
+ if c2 < tx || t2 <= c2 {
+ return 0, 2
+ }
+ o = int(i)<<6 + int(c2)
+ i = t.Index[o]
+ c3 := s[3]
+ if c3 < tx || t2 <= c3 {
+ return 0, 3
+ }
+ return t.lookupValue(i, c3), 4
+ }
+ // Illegal rune
+ return 0, 1
+}
diff --git a/vendor/golang.org/x/text/internal/colltab/weighter.go b/vendor/golang.org/x/text/internal/colltab/weighter.go
new file mode 100644
index 0000000..f1ec45f
--- /dev/null
+++ b/vendor/golang.org/x/text/internal/colltab/weighter.go
@@ -0,0 +1,31 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package colltab // import "golang.org/x/text/internal/colltab"
+
+// A Weighter can be used as a source for Collator and Searcher.
+type Weighter interface {
+ // Start finds the start of the segment that includes position p.
+ Start(p int, b []byte) int
+
+ // StartString finds the start of the segment that includes position p.
+ StartString(p int, s string) int
+
+ // AppendNext appends Elems to buf corresponding to the longest match
+ // of a single character or contraction from the start of s.
+ // It returns the new buf and the number of bytes consumed.
+ AppendNext(buf []Elem, s []byte) (ce []Elem, n int)
+
+ // AppendNextString appends Elems to buf corresponding to the longest match
+ // of a single character or contraction from the start of s.
+ // It returns the new buf and the number of bytes consumed.
+ AppendNextString(buf []Elem, s string) (ce []Elem, n int)
+
+ // Domain returns a slice of all single characters and contractions for which
+ // collation elements are defined in this table.
+ Domain() []string
+
+ // Top returns the highest variable primary value.
+ Top() uint32
+}
diff --git a/vendor/golang.org/x/text/internal/tag/tag.go b/vendor/golang.org/x/text/internal/tag/tag.go
new file mode 100644
index 0000000..b5d3488
--- /dev/null
+++ b/vendor/golang.org/x/text/internal/tag/tag.go
@@ -0,0 +1,100 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package tag contains functionality handling tags and related data.
+package tag // import "golang.org/x/text/internal/tag"
+
+import "sort"
+
+// An Index converts tags to a compact numeric value.
+//
+// All elements are of size 4. Tags may be up to 4 bytes long. Excess bytes can
+// be used to store additional information about the tag.
+type Index string
+
+// Elem returns the element data at the given index.
+func (s Index) Elem(x int) string {
+ return string(s[x*4 : x*4+4])
+}
+
+// Index reports the index of the given key or -1 if it could not be found.
+// Only the first len(key) bytes from the start of the 4-byte entries will be
+// considered for the search and the first match in Index will be returned.
+func (s Index) Index(key []byte) int {
+ n := len(key)
+ // search the index of the first entry with an equal or higher value than
+ // key in s.
+ index := sort.Search(len(s)/4, func(i int) bool {
+ return cmp(s[i*4:i*4+n], key) != -1
+ })
+ i := index * 4
+ if cmp(s[i:i+len(key)], key) != 0 {
+ return -1
+ }
+ return index
+}
+
+// Next finds the next occurrence of key after index x, which must have been
+// obtained from a call to Index using the same key. It returns x+1 or -1.
+func (s Index) Next(key []byte, x int) int {
+ if x++; x*4 < len(s) && cmp(s[x*4:x*4+len(key)], key) == 0 {
+ return x
+ }
+ return -1
+}
+
+// cmp returns an integer comparing a and b lexicographically.
+func cmp(a Index, b []byte) int {
+ n := len(a)
+ if len(b) < n {
+ n = len(b)
+ }
+ for i, c := range b[:n] {
+ switch {
+ case a[i] > c:
+ return 1
+ case a[i] < c:
+ return -1
+ }
+ }
+ switch {
+ case len(a) < len(b):
+ return -1
+ case len(a) > len(b):
+ return 1
+ }
+ return 0
+}
+
+// Compare returns an integer comparing a and b lexicographically.
+func Compare(a string, b []byte) int {
+ return cmp(Index(a), b)
+}
+
+// FixCase reformats b to the same pattern of cases as form.
+// If returns false if string b is malformed.
+func FixCase(form string, b []byte) bool {
+ if len(form) != len(b) {
+ return false
+ }
+ for i, c := range b {
+ if form[i] <= 'Z' {
+ if c >= 'a' {
+ c -= 'z' - 'Z'
+ }
+ if c < 'A' || 'Z' < c {
+ return false
+ }
+ } else {
+ if c <= 'Z' {
+ c += 'z' - 'Z'
+ }
+ if c < 'a' || 'z' < c {
+ return false
+ }
+ }
+ b[i] = c
+ }
+ return true
+}
diff --git a/vendor/golang.org/x/text/internal/ucd/ucd.go b/vendor/golang.org/x/text/internal/ucd/ucd.go
index 309e8d8..8c45b5f 100644
--- a/vendor/golang.org/x/text/internal/ucd/ucd.go
+++ b/vendor/golang.org/x/text/internal/ucd/ucd.go
@@ -11,8 +11,8 @@ package ucd // import "golang.org/x/text/internal/ucd"
import (
"bufio"
- "bytes"
"errors"
+ "fmt"
"io"
"log"
"regexp"
@@ -92,10 +92,11 @@ type Parser struct {
keepRanges bool // Don't expand rune ranges in field 0.
err error
- comment []byte
- field [][]byte
+ comment string
+ field []string
// parsedRange is needed in case Range(0) is called more than once for one
// field. In some cases this requires scanning ahead.
+ line int
parsedRange bool
rangeStart, rangeEnd rune
@@ -103,15 +104,19 @@ type Parser struct {
commentHandler func(s string)
}
-func (p *Parser) setError(err error) {
- if p.err == nil {
- p.err = err
+func (p *Parser) setError(err error, msg string) {
+ if p.err == nil && err != nil {
+ if msg == "" {
+ p.err = fmt.Errorf("ucd:line:%d: %v", p.line, err)
+ } else {
+ p.err = fmt.Errorf("ucd:line:%d:%s: %v", p.line, msg, err)
+ }
}
}
-func (p *Parser) getField(i int) []byte {
+func (p *Parser) getField(i int) string {
if i >= len(p.field) {
- return nil
+ return ""
}
return p.field[i]
}
@@ -139,65 +144,66 @@ func (p *Parser) Next() bool {
p.rangeStart++
return true
}
- p.comment = nil
+ p.comment = ""
p.field = p.field[:0]
p.parsedRange = false
- for p.scanner.Scan() {
- b := p.scanner.Bytes()
- if len(b) == 0 {
+ for p.scanner.Scan() && p.err == nil {
+ p.line++
+ s := p.scanner.Text()
+ if s == "" {
continue
}
- if b[0] == '#' {
+ if s[0] == '#' {
if p.commentHandler != nil {
- p.commentHandler(strings.TrimSpace(string(b[1:])))
+ p.commentHandler(strings.TrimSpace(s[1:]))
}
continue
}
// Parse line
- if i := bytes.IndexByte(b, '#'); i != -1 {
- p.comment = bytes.TrimSpace(b[i+1:])
- b = b[:i]
+ if i := strings.IndexByte(s, '#'); i != -1 {
+ p.comment = strings.TrimSpace(s[i+1:])
+ s = s[:i]
}
- if b[0] == '@' {
+ if s[0] == '@' {
if p.partHandler != nil {
- p.field = append(p.field, bytes.TrimSpace(b[1:]))
+ p.field = append(p.field, strings.TrimSpace(s[1:]))
p.partHandler(p)
p.field = p.field[:0]
}
- p.comment = nil
+ p.comment = ""
continue
}
for {
- i := bytes.IndexByte(b, ';')
+ i := strings.IndexByte(s, ';')
if i == -1 {
- p.field = append(p.field, bytes.TrimSpace(b))
+ p.field = append(p.field, strings.TrimSpace(s))
break
}
- p.field = append(p.field, bytes.TrimSpace(b[:i]))
- b = b[i+1:]
+ p.field = append(p.field, strings.TrimSpace(s[:i]))
+ s = s[i+1:]
}
if !p.keepRanges {
p.rangeStart, p.rangeEnd = p.getRange(0)
}
return true
}
- p.setError(p.scanner.Err())
+ p.setError(p.scanner.Err(), "scanner failed")
return false
}
-func parseRune(b []byte) (rune, error) {
+func parseRune(b string) (rune, error) {
if len(b) > 2 && b[0] == 'U' && b[1] == '+' {
b = b[2:]
}
- x, err := strconv.ParseUint(string(b), 16, 32)
+ x, err := strconv.ParseUint(b, 16, 32)
return rune(x), err
}
-func (p *Parser) parseRune(b []byte) rune {
- x, err := parseRune(b)
- p.setError(err)
+func (p *Parser) parseRune(s string) rune {
+ x, err := parseRune(s)
+ p.setError(err, "failed to parse rune")
return x
}
@@ -211,13 +217,13 @@ func (p *Parser) Rune(i int) rune {
// Runes interprets and returns field i as a sequence of runes.
func (p *Parser) Runes(i int) (runes []rune) {
- add := func(b []byte) {
- if b = bytes.TrimSpace(b); len(b) > 0 {
- runes = append(runes, p.parseRune(b))
+ add := func(s string) {
+ if s = strings.TrimSpace(s); len(s) > 0 {
+ runes = append(runes, p.parseRune(s))
}
}
for b := p.getField(i); ; {
- i := bytes.IndexByte(b, ' ')
+ i := strings.IndexByte(b, ' ')
if i == -1 {
add(b)
break
@@ -247,7 +253,7 @@ func (p *Parser) Range(i int) (first, last rune) {
func (p *Parser) getRange(i int) (first, last rune) {
b := p.getField(i)
- if k := bytes.Index(b, []byte("..")); k != -1 {
+ if k := strings.Index(b, ".."); k != -1 {
return p.parseRune(b[:k]), p.parseRune(b[k+2:])
}
// The first field may not be a rune, in which case we may ignore any error
@@ -260,23 +266,24 @@ func (p *Parser) getRange(i int) (first, last rune) {
p.keepRanges = true
}
// Special case for UnicodeData that was retained for backwards compatibility.
- if i == 0 && len(p.field) > 1 && bytes.HasSuffix(p.field[1], []byte("First>")) {
+ if i == 0 && len(p.field) > 1 && strings.HasSuffix(p.field[1], "First>") {
if p.parsedRange {
return p.rangeStart, p.rangeEnd
}
mf := reRange.FindStringSubmatch(p.scanner.Text())
+ p.line++
if mf == nil || !p.scanner.Scan() {
- p.setError(errIncorrectLegacyRange)
+ p.setError(errIncorrectLegacyRange, "")
return x, x
}
// Using Bytes would be more efficient here, but Text is a lot easier
// and this is not a frequent case.
ml := reRange.FindStringSubmatch(p.scanner.Text())
if ml == nil || mf[2] != ml[2] || ml[3] != "Last" || mf[4] != ml[4] {
- p.setError(errIncorrectLegacyRange)
+ p.setError(errIncorrectLegacyRange, "")
return x, x
}
- p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Bytes()[:len(ml[1])])
+ p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Text()[:len(ml[1])])
p.parsedRange = true
return p.rangeStart, p.rangeEnd
}
@@ -298,34 +305,34 @@ var bools = map[string]bool{
// Bool parses and returns field i as a boolean value.
func (p *Parser) Bool(i int) bool {
- b := p.getField(i)
+ f := p.getField(i)
for s, v := range bools {
- if bstrEq(b, s) {
+ if f == s {
return v
}
}
- p.setError(strconv.ErrSyntax)
+ p.setError(strconv.ErrSyntax, "error parsing bool")
return false
}
// Int parses and returns field i as an integer value.
func (p *Parser) Int(i int) int {
x, err := strconv.ParseInt(string(p.getField(i)), 10, 64)
- p.setError(err)
+ p.setError(err, "error parsing int")
return int(x)
}
// Uint parses and returns field i as an unsigned integer value.
func (p *Parser) Uint(i int) uint {
x, err := strconv.ParseUint(string(p.getField(i)), 10, 64)
- p.setError(err)
+ p.setError(err, "error parsing uint")
return uint(x)
}
// Float parses and returns field i as a decimal value.
func (p *Parser) Float(i int) float64 {
x, err := strconv.ParseFloat(string(p.getField(i)), 64)
- p.setError(err)
+ p.setError(err, "error parsing float")
return x
}
@@ -353,24 +360,12 @@ var errUndefinedEnum = errors.New("ucd: undefined enum value")
// Enum interprets and returns field i as a value that must be one of the values
// in enum.
func (p *Parser) Enum(i int, enum ...string) string {
- b := p.getField(i)
+ f := p.getField(i)
for _, s := range enum {
- if bstrEq(b, s) {
+ if f == s {
return s
}
}
- p.setError(errUndefinedEnum)
+ p.setError(errUndefinedEnum, "error parsing enum")
return ""
}
-
-func bstrEq(b []byte, s string) bool {
- if len(b) != len(s) {
- return false
- }
- for i, c := range b {
- if c != s[i] {
- return false
- }
- }
- return true
-}