diff options
author | Niall Sheridan <nsheridan@gmail.com> | 2018-06-20 22:39:07 +0100 |
---|---|---|
committer | Niall Sheridan <nsheridan@gmail.com> | 2018-06-20 22:39:07 +0100 |
commit | de6d2c524430287c699aaa898c1325da6afea539 (patch) | |
tree | f78eb841208d667668a7bc92a9290d693cc7103b /vendor/golang.org/x/text/internal | |
parent | eb99016e1629e690e55633de6fc63a14c53e7ea2 (diff) |
Update dependencies
Diffstat (limited to 'vendor/golang.org/x/text/internal')
-rw-r--r-- | vendor/golang.org/x/text/internal/colltab/collelem.go | 371 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/colltab/colltab.go | 105 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/colltab/contract.go | 145 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/colltab/iter.go | 178 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/colltab/numeric.go | 236 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/colltab/table.go | 275 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/colltab/trie.go | 159 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/colltab/weighter.go | 31 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/gen/code.go | 351 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/gen/gen.go | 281 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/tag/tag.go | 100 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/triegen/compact.go | 58 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/triegen/print.go | 251 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/triegen/triegen.go | 494 | ||||
-rw-r--r-- | vendor/golang.org/x/text/internal/ucd/ucd.go | 371 |
15 files changed, 0 insertions, 3406 deletions
diff --git a/vendor/golang.org/x/text/internal/colltab/collelem.go b/vendor/golang.org/x/text/internal/colltab/collelem.go deleted file mode 100644 index 2855589..0000000 --- a/vendor/golang.org/x/text/internal/colltab/collelem.go +++ /dev/null @@ -1,371 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package colltab - -import ( - "fmt" - "unicode" -) - -// Level identifies the collation comparison level. -// The primary level corresponds to the basic sorting of text. -// The secondary level corresponds to accents and related linguistic elements. -// The tertiary level corresponds to casing and related concepts. -// The quaternary level is derived from the other levels by the -// various algorithms for handling variable elements. -type Level int - -const ( - Primary Level = iota - Secondary - Tertiary - Quaternary - Identity - - NumLevels -) - -const ( - defaultSecondary = 0x20 - defaultTertiary = 0x2 - maxTertiary = 0x1F - MaxQuaternary = 0x1FFFFF // 21 bits. -) - -// Elem is a representation of a collation element. This API provides ways to encode -// and decode Elems. Implementations of collation tables may use values greater -// or equal to PrivateUse for their own purposes. However, these should never be -// returned by AppendNext. -type Elem uint32 - -const ( - maxCE Elem = 0xAFFFFFFF - PrivateUse = minContract - minContract = 0xC0000000 - maxContract = 0xDFFFFFFF - minExpand = 0xE0000000 - maxExpand = 0xEFFFFFFF - minDecomp = 0xF0000000 -) - -type ceType int - -const ( - ceNormal ceType = iota // ceNormal includes implicits (ce == 0) - ceContractionIndex // rune can be a start of a contraction - ceExpansionIndex // rune expands into a sequence of collation elements - ceDecompose // rune expands using NFKC decomposition -) - -func (ce Elem) ctype() ceType { - if ce <= maxCE { - return ceNormal - } - if ce <= maxContract { - return ceContractionIndex - } else { - if ce <= maxExpand { - return ceExpansionIndex - } - return ceDecompose - } - panic("should not reach here") - return ceType(-1) -} - -// For normal collation elements, we assume that a collation element either has -// a primary or non-default secondary value, not both. -// Collation elements with a primary value are of the form -// 01pppppp pppppppp ppppppp0 ssssssss -// - p* is primary collation value -// - s* is the secondary collation value -// 00pppppp pppppppp ppppppps sssttttt, where -// - p* is primary collation value -// - s* offset of secondary from default value. -// - t* is the tertiary collation value -// 100ttttt cccccccc pppppppp pppppppp -// - t* is the tertiar collation value -// - c* is the canonical combining class -// - p* is the primary collation value -// Collation elements with a secondary value are of the form -// 1010cccc ccccssss ssssssss tttttttt, where -// - c* is the canonical combining class -// - s* is the secondary collation value -// - t* is the tertiary collation value -// 11qqqqqq qqqqqqqq qqqqqqq0 00000000 -// - q* quaternary value -const ( - ceTypeMask = 0xC0000000 - ceTypeMaskExt = 0xE0000000 - ceIgnoreMask = 0xF00FFFFF - ceType1 = 0x40000000 - ceType2 = 0x00000000 - ceType3or4 = 0x80000000 - ceType4 = 0xA0000000 - ceTypeQ = 0xC0000000 - Ignore = ceType4 - firstNonPrimary = 0x80000000 - lastSpecialPrimary = 0xA0000000 - secondaryMask = 0x80000000 - hasTertiaryMask = 0x40000000 - primaryValueMask = 0x3FFFFE00 - maxPrimaryBits = 21 - compactPrimaryBits = 16 - maxSecondaryBits = 12 - maxTertiaryBits = 8 - maxCCCBits = 8 - maxSecondaryCompactBits = 8 - maxSecondaryDiffBits = 4 - maxTertiaryCompactBits = 5 - primaryShift = 9 - compactSecondaryShift = 5 - minCompactSecondary = defaultSecondary - 4 -) - -func makeImplicitCE(primary int) Elem { - return ceType1 | Elem(primary<<primaryShift) | defaultSecondary -} - -// MakeElem returns an Elem for the given values. It will return an error -// if the given combination of values is invalid. -func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) { - if w := primary; w >= 1<<maxPrimaryBits || w < 0 { - return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits) - } - if w := secondary; w >= 1<<maxSecondaryBits || w < 0 { - return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits) - } - if w := tertiary; w >= 1<<maxTertiaryBits || w < 0 { - return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits) - } - ce := Elem(0) - if primary != 0 { - if ccc != 0 { - if primary >= 1<<compactPrimaryBits { - return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", primary, 1<<compactPrimaryBits) - } - if secondary != defaultSecondary { - return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", secondary, ccc) - } - ce = Elem(tertiary << (compactPrimaryBits + maxCCCBits)) - ce |= Elem(ccc) << compactPrimaryBits - ce |= Elem(primary) - ce |= ceType3or4 - } else if tertiary == defaultTertiary { - if secondary >= 1<<maxSecondaryCompactBits { - return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", secondary, 1<<maxSecondaryCompactBits) - } - ce = Elem(primary<<(maxSecondaryCompactBits+1) + secondary) - ce |= ceType1 - } else { - d := secondary - defaultSecondary + maxSecondaryDiffBits - if d >= 1<<maxSecondaryDiffBits || d < 0 { - return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits) - } - if tertiary >= 1<<maxTertiaryCompactBits { - return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", tertiary, 1<<maxTertiaryCompactBits) - } - ce = Elem(primary<<maxSecondaryDiffBits + d) - ce = ce<<maxTertiaryCompactBits + Elem(tertiary) - } - } else { - ce = Elem(secondary<<maxTertiaryBits + tertiary) - ce += Elem(ccc) << (maxSecondaryBits + maxTertiaryBits) - ce |= ceType4 - } - return ce, nil -} - -// MakeQuaternary returns an Elem with the given quaternary value. -func MakeQuaternary(v int) Elem { - return ceTypeQ | Elem(v<<primaryShift) -} - -// Mask sets weights for any level smaller than l to 0. -// The resulting Elem can be used to test for equality with -// other Elems to which the same mask has been applied. -func (ce Elem) Mask(l Level) uint32 { - return 0 -} - -// CCC returns the canonical combining class associated with the underlying character, -// if applicable, or 0 otherwise. -func (ce Elem) CCC() uint8 { - if ce&ceType3or4 != 0 { - if ce&ceType4 == ceType3or4 { - return uint8(ce >> 16) - } - return uint8(ce >> 20) - } - return 0 -} - -// Primary returns the primary collation weight for ce. -func (ce Elem) Primary() int { - if ce >= firstNonPrimary { - if ce > lastSpecialPrimary { - return 0 - } - return int(uint16(ce)) - } - return int(ce&primaryValueMask) >> primaryShift -} - -// Secondary returns the secondary collation weight for ce. -func (ce Elem) Secondary() int { - switch ce & ceTypeMask { - case ceType1: - return int(uint8(ce)) - case ceType2: - return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF) - case ceType3or4: - if ce < ceType4 { - return defaultSecondary - } - return int(ce>>8) & 0xFFF - case ceTypeQ: - return 0 - } - panic("should not reach here") -} - -// Tertiary returns the tertiary collation weight for ce. -func (ce Elem) Tertiary() uint8 { - if ce&hasTertiaryMask == 0 { - if ce&ceType3or4 == 0 { - return uint8(ce & 0x1F) - } - if ce&ceType4 == ceType4 { - return uint8(ce) - } - return uint8(ce>>24) & 0x1F // type 2 - } else if ce&ceTypeMask == ceType1 { - return defaultTertiary - } - // ce is a quaternary value. - return 0 -} - -func (ce Elem) updateTertiary(t uint8) Elem { - if ce&ceTypeMask == ceType1 { - // convert to type 4 - nce := ce & primaryValueMask - nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift - ce = nce - } else if ce&ceTypeMaskExt == ceType3or4 { - ce &= ^Elem(maxTertiary << 24) - return ce | (Elem(t) << 24) - } else { - // type 2 or 4 - ce &= ^Elem(maxTertiary) - } - return ce | Elem(t) -} - -// Quaternary returns the quaternary value if explicitly specified, -// 0 if ce == Ignore, or MaxQuaternary otherwise. -// Quaternary values are used only for shifted variants. -func (ce Elem) Quaternary() int { - if ce&ceTypeMask == ceTypeQ { - return int(ce&primaryValueMask) >> primaryShift - } else if ce&ceIgnoreMask == Ignore { - return 0 - } - return MaxQuaternary -} - -// Weight returns the collation weight for the given level. -func (ce Elem) Weight(l Level) int { - switch l { - case Primary: - return ce.Primary() - case Secondary: - return ce.Secondary() - case Tertiary: - return int(ce.Tertiary()) - case Quaternary: - return ce.Quaternary() - } - return 0 // return 0 (ignore) for undefined levels. -} - -// For contractions, collation elements are of the form -// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where -// - n* is the size of the first node in the contraction trie. -// - i* is the index of the first node in the contraction trie. -// - b* is the offset into the contraction collation element table. -// See contract.go for details on the contraction trie. -const ( - maxNBits = 4 - maxTrieIndexBits = 12 - maxContractOffsetBits = 13 -) - -func splitContractIndex(ce Elem) (index, n, offset int) { - n = int(ce & (1<<maxNBits - 1)) - ce >>= maxNBits - index = int(ce & (1<<maxTrieIndexBits - 1)) - ce >>= maxTrieIndexBits - offset = int(ce & (1<<maxContractOffsetBits - 1)) - return -} - -// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb, -// where b* is the index into the expansion sequence table. -const maxExpandIndexBits = 16 - -func splitExpandIndex(ce Elem) (index int) { - return int(uint16(ce)) -} - -// Some runes can be expanded using NFKD decomposition. Instead of storing the full -// sequence of collation elements, we decompose the rune and lookup the collation -// elements for each rune in the decomposition and modify the tertiary weights. -// The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where -// - v* is the replacement tertiary weight for the first rune, -// - w* is the replacement tertiary weight for the second rune, -// Tertiary weights of subsequent runes should be replaced with maxTertiary. -// See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details. -func splitDecompose(ce Elem) (t1, t2 uint8) { - return uint8(ce), uint8(ce >> 8) -} - -const ( - // These constants were taken from http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf. - minUnified rune = 0x4E00 - maxUnified = 0x9FFF - minCompatibility = 0xF900 - maxCompatibility = 0xFAFF - minRare = 0x3400 - maxRare = 0x4DBF -) -const ( - commonUnifiedOffset = 0x10000 - rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF - otherOffset = 0x50000 // largest rune in rare is U+2FA1D - illegalOffset = otherOffset + int(unicode.MaxRune) - maxPrimary = illegalOffset + 1 -) - -// implicitPrimary returns the primary weight for the a rune -// for which there is no entry for the rune in the collation table. -// We take a different approach from the one specified in -// http://unicode.org/reports/tr10/#Implicit_Weights, -// but preserve the resulting relative ordering of the runes. -func implicitPrimary(r rune) int { - if unicode.Is(unicode.Ideographic, r) { - if r >= minUnified && r <= maxUnified { - // The most common case for CJK. - return int(r) + commonUnifiedOffset - } - if r >= minCompatibility && r <= maxCompatibility { - // This will typically not hit. The DUCET explicitly specifies mappings - // for all characters that do not decompose. - return int(r) + commonUnifiedOffset - } - return int(r) + rareUnifiedOffset - } - return int(r) + otherOffset -} diff --git a/vendor/golang.org/x/text/internal/colltab/colltab.go b/vendor/golang.org/x/text/internal/colltab/colltab.go deleted file mode 100644 index 02f2247..0000000 --- a/vendor/golang.org/x/text/internal/colltab/colltab.go +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package colltab contains functionality related to collation tables. -// It is only to be used by the collate and search packages. -package colltab // import "golang.org/x/text/internal/colltab" - -import ( - "sort" - - "golang.org/x/text/language" -) - -// MatchLang finds the index of t in tags, using a matching algorithm used for -// collation and search. tags[0] must be language.Und, the remaining tags should -// be sorted alphabetically. -// -// Language matching for collation and search is different from the matching -// defined by language.Matcher: the (inferred) base language must be an exact -// match for the relevant fields. For example, "gsw" should not match "de". -// Also the parent relation is different, as a parent may have a different -// script. So usually the parent of zh-Hant is und, whereas for MatchLang it is -// zh. -func MatchLang(t language.Tag, tags []language.Tag) int { - // Canonicalize the values, including collapsing macro languages. - t, _ = language.All.Canonicalize(t) - - base, conf := t.Base() - // Estimate the base language, but only use high-confidence values. - if conf < language.High { - // The root locale supports "search" and "standard". We assume that any - // implementation will only use one of both. - return 0 - } - - // Maximize base and script and normalize the tag. - if _, s, r := t.Raw(); (r != language.Region{}) { - p, _ := language.Raw.Compose(base, s, r) - // Taking the parent forces the script to be maximized. - p = p.Parent() - // Add back region and extensions. - t, _ = language.Raw.Compose(p, r, t.Extensions()) - } else { - // Set the maximized base language. - t, _ = language.Raw.Compose(base, s, t.Extensions()) - } - - // Find start index of the language tag. - start := 1 + sort.Search(len(tags)-1, func(i int) bool { - b, _, _ := tags[i+1].Raw() - return base.String() <= b.String() - }) - if start < len(tags) { - if b, _, _ := tags[start].Raw(); b != base { - return 0 - } - } - - // Besides the base language, script and region, only the collation type and - // the custom variant defined in the 'u' extension are used to distinguish a - // locale. - // Strip all variants and extensions and add back the custom variant. - tdef, _ := language.Raw.Compose(t.Raw()) - tdef, _ = tdef.SetTypeForKey("va", t.TypeForKey("va")) - - // First search for a specialized collation type, if present. - try := []language.Tag{tdef} - if co := t.TypeForKey("co"); co != "" { - tco, _ := tdef.SetTypeForKey("co", co) - try = []language.Tag{tco, tdef} - } - - for _, tx := range try { - for ; tx != language.Und; tx = parent(tx) { - for i, t := range tags[start:] { - if b, _, _ := t.Raw(); b != base { - break - } - if tx == t { - return start + i - } - } - } - } - return 0 -} - -// parent computes the structural parent. This means inheritance may change -// script. So, unlike the CLDR parent, parent(zh-Hant) == zh. -func parent(t language.Tag) language.Tag { - if t.TypeForKey("va") != "" { - t, _ = t.SetTypeForKey("va", "") - return t - } - result := language.Und - if b, s, r := t.Raw(); (r != language.Region{}) { - result, _ = language.Raw.Compose(b, s, t.Extensions()) - } else if (s != language.Script{}) { - result, _ = language.Raw.Compose(b, t.Extensions()) - } else if (b != language.Base{}) { - result, _ = language.Raw.Compose(t.Extensions()) - } - return result -} diff --git a/vendor/golang.org/x/text/internal/colltab/contract.go b/vendor/golang.org/x/text/internal/colltab/contract.go deleted file mode 100644 index 25649d4..0000000 --- a/vendor/golang.org/x/text/internal/colltab/contract.go +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package colltab - -import "unicode/utf8" - -// For a description of ContractTrieSet, see text/collate/build/contract.go. - -type ContractTrieSet []struct{ L, H, N, I uint8 } - -// ctScanner is used to match a trie to an input sequence. -// A contraction may match a non-contiguous sequence of bytes in an input string. -// For example, if there is a contraction for <a, combining_ring>, it should match -// the sequence <a, combining_cedilla, combining_ring>, as combining_cedilla does -// not block combining_ring. -// ctScanner does not automatically skip over non-blocking non-starters, but rather -// retains the state of the last match and leaves it up to the user to continue -// the match at the appropriate points. -type ctScanner struct { - states ContractTrieSet - s []byte - n int - index int - pindex int - done bool -} - -type ctScannerString struct { - states ContractTrieSet - s string - n int - index int - pindex int - done bool -} - -func (t ContractTrieSet) scanner(index, n int, b []byte) ctScanner { - return ctScanner{s: b, states: t[index:], n: n} -} - -func (t ContractTrieSet) scannerString(index, n int, str string) ctScannerString { - return ctScannerString{s: str, states: t[index:], n: n} -} - -// result returns the offset i and bytes consumed p so far. If no suffix -// matched, i and p will be 0. -func (s *ctScanner) result() (i, p int) { - return s.index, s.pindex -} - -func (s *ctScannerString) result() (i, p int) { - return s.index, s.pindex -} - -const ( - final = 0 - noIndex = 0xFF -) - -// scan matches the longest suffix at the current location in the input -// and returns the number of bytes consumed. -func (s *ctScanner) scan(p int) int { - pr := p // the p at the rune start - str := s.s - states, n := s.states, s.n - for i := 0; i < n && p < len(str); { - e := states[i] - c := str[p] - // TODO: a significant number of contractions are of a form that - // cannot match discontiguous UTF-8 in a normalized string. We could let - // a negative value of e.n mean that we can set s.done = true and avoid - // the need for additional matches. - if c >= e.L { - if e.L == c { - p++ - if e.I != noIndex { - s.index = int(e.I) - s.pindex = p - } - if e.N != final { - i, states, n = 0, states[int(e.H)+n:], int(e.N) - if p >= len(str) || utf8.RuneStart(str[p]) { - s.states, s.n, pr = states, n, p - } - } else { - s.done = true - return p - } - continue - } else if e.N == final && c <= e.H { - p++ - s.done = true - s.index = int(c-e.L) + int(e.I) - s.pindex = p - return p - } - } - i++ - } - return pr -} - -// scan is a verbatim copy of ctScanner.scan. -func (s *ctScannerString) scan(p int) int { - pr := p // the p at the rune start - str := s.s - states, n := s.states, s.n - for i := 0; i < n && p < len(str); { - e := states[i] - c := str[p] - // TODO: a significant number of contractions are of a form that - // cannot match discontiguous UTF-8 in a normalized string. We could let - // a negative value of e.n mean that we can set s.done = true and avoid - // the need for additional matches. - if c >= e.L { - if e.L == c { - p++ - if e.I != noIndex { - s.index = int(e.I) - s.pindex = p - } - if e.N != final { - i, states, n = 0, states[int(e.H)+n:], int(e.N) - if p >= len(str) || utf8.RuneStart(str[p]) { - s.states, s.n, pr = states, n, p - } - } else { - s.done = true - return p - } - continue - } else if e.N == final && c <= e.H { - p++ - s.done = true - s.index = int(c-e.L) + int(e.I) - s.pindex = p - return p - } - } - i++ - } - return pr -} diff --git a/vendor/golang.org/x/text/internal/colltab/iter.go b/vendor/golang.org/x/text/internal/colltab/iter.go deleted file mode 100644 index c1b1ba8..0000000 --- a/vendor/golang.org/x/text/internal/colltab/iter.go +++ /dev/null @@ -1,178 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package colltab - -// An Iter incrementally converts chunks of the input text to collation -// elements, while ensuring that the collation elements are in normalized order -// (that is, they are in the order as if the input text were normalized first). -type Iter struct { - Weighter Weighter - Elems []Elem - // N is the number of elements in Elems that will not be reordered on - // subsequent iterations, N <= len(Elems). - N int - - bytes []byte - str string - // Because the Elems buffer may contain collation elements that are needed - // for look-ahead, we need two positions in the text (bytes or str): one for - // the end position in the text for the current iteration and one for the - // start of the next call to appendNext. - pEnd int // end position in text corresponding to N. - pNext int // pEnd <= pNext. -} - -// Reset sets the position in the current input text to p and discards any -// results obtained so far. -func (i *Iter) Reset(p int) { - i.Elems = i.Elems[:0] - i.N = 0 - i.pEnd = p - i.pNext = p -} - -// Len returns the length of the input text. -func (i *Iter) Len() int { - if i.bytes != nil { - return len(i.bytes) - } - return len(i.str) -} - -// Discard removes the collation elements up to N. -func (i *Iter) Discard() { - // TODO: change this such that only modifiers following starters will have - // to be copied. - i.Elems = i.Elems[:copy(i.Elems, i.Elems[i.N:])] - i.N = 0 -} - -// End returns the end position of the input text for which Next has returned -// results. -func (i *Iter) End() int { - return i.pEnd -} - -// SetInput resets i to input s. -func (i *Iter) SetInput(s []byte) { - i.bytes = s - i.str = "" - i.Reset(0) -} - -// SetInputString resets i to input s. -func (i *Iter) SetInputString(s string) { - i.str = s - i.bytes = nil - i.Reset(0) -} - -func (i *Iter) done() bool { - return i.pNext >= len(i.str) && i.pNext >= len(i.bytes) -} - -func (i *Iter) appendNext() bool { - if i.done() { - return false - } - var sz int - if i.bytes == nil { - i.Elems, sz = i.Weighter.AppendNextString(i.Elems, i.str[i.pNext:]) - } else { - i.Elems, sz = i.Weighter.AppendNext(i.Elems, i.bytes[i.pNext:]) - } - if sz == 0 { - sz = 1 - } - i.pNext += sz - return true -} - -// Next appends Elems to the internal array. On each iteration, it will either -// add starters or modifiers. In the majority of cases, an Elem with a primary -// value > 0 will have a CCC of 0. The CCC values of collation elements are also -// used to detect if the input string was not normalized and to adjust the -// result accordingly. -func (i *Iter) Next() bool { - if i.N == len(i.Elems) && !i.appendNext() { - return false - } - - // Check if the current segment starts with a starter. - prevCCC := i.Elems[len(i.Elems)-1].CCC() - if prevCCC == 0 { - i.N = len(i.Elems) - i.pEnd = i.pNext - return true - } else if i.Elems[i.N].CCC() == 0 { - // set i.N to only cover part of i.Elems for which prevCCC == 0 and - // use rest for the next call to next. - for i.N++; i.N < len(i.Elems) && i.Elems[i.N].CCC() == 0; i.N++ { - } - i.pEnd = i.pNext - return true - } - - // The current (partial) segment starts with modifiers. We need to collect - // all successive modifiers to ensure that they are normalized. - for { - p := len(i.Elems) - i.pEnd = i.pNext - if !i.appendNext() { - break - } - - if ccc := i.Elems[p].CCC(); ccc == 0 || len(i.Elems)-i.N > maxCombiningCharacters { - // Leave the starter for the next iteration. This ensures that we - // do not return sequences of collation elements that cross two - // segments. - // - // TODO: handle large number of combining characters by fully - // normalizing the input segment before iteration. This ensures - // results are consistent across the text repo. - i.N = p - return true - } else if ccc < prevCCC { - i.doNorm(p, ccc) // should be rare, never occurs for NFD and FCC. - } else { - prevCCC = ccc - } - } - - done := len(i.Elems) != i.N - i.N = len(i.Elems) - return done -} - -// nextNoNorm is the same as next, but does not "normalize" the collation -// elements. -func (i *Iter) nextNoNorm() bool { - // TODO: remove this function. Using this instead of next does not seem - // to improve performance in any significant way. We retain this until - // later for evaluation purposes. - if i.done() { - return false - } - i.appendNext() - i.N = len(i.Elems) - return true -} - -const maxCombiningCharacters = 30 - -// doNorm reorders the collation elements in i.Elems. -// It assumes that blocks of collation elements added with appendNext -// either start and end with the same CCC or start with CCC == 0. -// This allows for a single insertion point for the entire block. -// The correctness of this assumption is verified in builder.go. -func (i *Iter) doNorm(p int, ccc uint8) { - n := len(i.Elems) - k := p - for p--; p > i.N && ccc < i.Elems[p-1].CCC(); p-- { - } - i.Elems = append(i.Elems, i.Elems[p:k]...) - copy(i.Elems[p:], i.Elems[k:]) - i.Elems = i.Elems[:n] -} diff --git a/vendor/golang.org/x/text/internal/colltab/numeric.go b/vendor/golang.org/x/text/internal/colltab/numeric.go deleted file mode 100644 index 38c255c..0000000 --- a/vendor/golang.org/x/text/internal/colltab/numeric.go +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package colltab - -import ( - "unicode" - "unicode/utf8" -) - -// NewNumericWeighter wraps w to replace individual digits to sort based on their -// numeric value. -// -// Weighter w must have a free primary weight after the primary weight for 9. -// If this is not the case, numeric value will sort at the same primary level -// as the first primary sorting after 9. -func NewNumericWeighter(w Weighter) Weighter { - getElem := func(s string) Elem { - elems, _ := w.AppendNextString(nil, s) - return elems[0] - } - nine := getElem("9") - - // Numbers should order before zero, but the DUCET has no room for this. - // TODO: move before zero once we use fractional collation elements. - ns, _ := MakeElem(nine.Primary()+1, nine.Secondary(), int(nine.Tertiary()), 0) - - return &numericWeighter{ - Weighter: w, - - // We assume that w sorts digits of different kinds in order of numeric - // value and that the tertiary weight order is preserved. - // - // TODO: evaluate whether it is worth basing the ranges on the Elem - // encoding itself once the move to fractional weights is complete. - zero: getElem("0"), - zeroSpecialLo: getElem("0"), // U+FF10 FULLWIDTH DIGIT ZERO - zeroSpecialHi: getElem("₀"), // U+2080 SUBSCRIPT ZERO - nine: nine, - nineSpecialHi: getElem("₉"), // U+2089 SUBSCRIPT NINE - numberStart: ns, - } -} - -// A numericWeighter translates a stream of digits into a stream of weights -// representing the numeric value. -type numericWeighter struct { - Weighter - - // The Elems below all demarcate boundaries of specific ranges. With the - // current element encoding digits are in two ranges: normal (default - // tertiary value) and special. For most languages, digits have collation - // elements in the normal range. - // - // Note: the range tests are very specific for the element encoding used by - // this implementation. The tests in collate_test.go are designed to fail - // if this code is not updated when an encoding has changed. - - zero Elem // normal digit zero - zeroSpecialLo Elem // special digit zero, low tertiary value - zeroSpecialHi Elem // special digit zero, high tertiary value - nine Elem // normal digit nine - nineSpecialHi Elem // special digit nine - numberStart Elem -} - -// AppendNext calls the namesake of the underlying weigher, but replaces single -// digits with weights representing their value. -func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) { - ce, n = nw.Weighter.AppendNext(buf, s) - nc := numberConverter{ - elems: buf, - w: nw, - b: s, - } - isZero, ok := nc.checkNextDigit(ce) - if !ok { - return ce, n - } - // ce might have been grown already, so take it instead of buf. - nc.init(ce, len(buf), isZero) - for n < len(s) { - ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:]) - nc.b = s - n += sz - if !nc.update(ce) { - break - } - } - return nc.result(), n -} - -// AppendNextString calls the namesake of the underlying weigher, but replaces -// single digits with weights representing their value. -func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n int) { - ce, n = nw.Weighter.AppendNextString(buf, s) - nc := numberConverter{ - elems: buf, - w: nw, - s: s, - } - isZero, ok := nc.checkNextDigit(ce) - if !ok { - return ce, n - } - nc.init(ce, len(buf), isZero) - for n < len(s) { - ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:]) - nc.s = s - n += sz - if !nc.update(ce) { - break - } - } - return nc.result(), n -} - -type numberConverter struct { - w *numericWeighter - - elems []Elem - nDigits int - lenIndex int - - s string // set if the input was of type string - b []byte // set if the input was of type []byte -} - -// init completes initialization of a numberConverter and prepares it for adding -// more digits. elems is assumed to have a digit starting at oldLen. -func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) { - // Insert a marker indicating the start of a number and and a placeholder - // for the number of digits. - if isZero { - elems = append(elems[:oldLen], nc.w.numberStart, 0) - } else { - elems = append(elems, 0, 0) - copy(elems[oldLen+2:], elems[oldLen:]) - elems[oldLen] = nc.w.numberStart - elems[oldLen+1] = 0 - - nc.nDigits = 1 - } - nc.elems = elems - nc.lenIndex = oldLen + 1 -} - -// checkNextDigit reports whether bufNew adds a single digit relative to the old -// buffer. If it does, it also reports whether this digit is zero. -func (nc *numberConverter) checkNextDigit(bufNew []Elem) (isZero, ok bool) { - if len(nc.elems) >= len(bufNew) { - return false, false - } - e := bufNew[len(nc.elems)] - if e < nc.w.zeroSpecialLo || nc.w.nine < e { - // Not a number. - return false, false - } - if e < nc.w.zero { - if e > nc.w.nineSpecialHi { - // Not a number. - return false, false - } - if !nc.isDigit() { - return false, false - } - isZero = e <= nc.w.zeroSpecialHi - } else { - // This is the common case if we encounter a digit. - isZero = e == nc.w.zero - } - // Test the remaining added collation elements have a zero primary value. - if n := len(bufNew) - len(nc.elems); n > 1 { - for i := len(nc.elems) + 1; i < len(bufNew); i++ { - if bufNew[i].Primary() != 0 { - return false, false - } - } - // In some rare cases, collation elements will encode runes in - // unicode.No as a digit. For example Ethiopic digits (U+1369 - U+1371) - // are not in Nd. Also some digits that clearly belong in unicode.No, - // like U+0C78 TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR, have - // collation elements indistinguishable from normal digits. - // Unfortunately, this means we need to make this check for nearly all - // non-Latin digits. - // - // TODO: check the performance impact and find something better if it is - // an issue. - if !nc.isDigit() { - return false, false - } - } - return isZero, true -} - -func (nc *numberConverter) isDigit() bool { - if nc.b != nil { - r, _ := utf8.DecodeRune(nc.b) - return unicode.In(r, unicode.Nd) - } - r, _ := utf8.DecodeRuneInString(nc.s) - return unicode.In(r, unicode.Nd) -} - -// We currently support a maximum of about 2M digits (the number of primary -// values). Such numbers will compare correctly against small numbers, but their -// comparison against other large numbers is undefined. -// -// TODO: define a proper fallback, such as comparing large numbers textually or -// actually allowing numbers of unlimited length. -// -// TODO: cap this to a lower number (like 100) and maybe allow a larger number -// in an option? -const maxDigits = 1<<maxPrimaryBits - 1 - -func (nc *numberConverter) update(elems []Elem) bool { - isZero, ok := nc.checkNextDigit(elems) - if nc.nDigits == 0 && isZero { - return true - } - nc.elems = elems - if !ok { - return false - } - nc.nDigits++ - return nc.nDigits < maxDigits -} - -// result fills in the length element for the digit sequence and returns the -// completed collation elements. -func (nc *numberConverter) result() []Elem { - e, _ := MakeElem(nc.nDigits, defaultSecondary, defaultTertiary, 0) - nc.elems[nc.lenIndex] = e - return nc.elems -} diff --git a/vendor/golang.org/x/text/internal/colltab/table.go b/vendor/golang.org/x/text/internal/colltab/table.go deleted file mode 100644 index e26e36d..0000000 --- a/vendor/golang.org/x/text/internal/colltab/table.go +++ /dev/null @@ -1,275 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package colltab - -import ( - "unicode/utf8" - - "golang.org/x/text/unicode/norm" -) - -// Table holds all collation data for a given collation ordering. -type Table struct { - Index Trie // main trie - - // expansion info - ExpandElem []uint32 - - // contraction info - ContractTries ContractTrieSet - ContractElem []uint32 - MaxContractLen int - VariableTop uint32 -} - -func (t *Table) AppendNext(w []Elem, b []byte) (res []Elem, n int) { - return t.appendNext(w, source{bytes: b}) -} - -func (t *Table) AppendNextString(w []Elem, s string) (res []Elem, n int) { - return t.appendNext(w, source{str: s}) -} - -func (t *Table) Start(p int, b []byte) int { - // TODO: implement - panic("not implemented") -} - -func (t *Table) StartString(p int, s string) int { - // TODO: implement - panic("not implemented") -} - -func (t *Table) Domain() []string { - // TODO: implement - panic("not implemented") -} - -func (t *Table) Top() uint32 { - return t.VariableTop -} - -type source struct { - str string - bytes []byte -} - -func (src *source) lookup(t *Table) (ce Elem, sz int) { - if src.bytes == nil { - return t.Index.lookupString(src.str) - } - return t.Index.lookup(src.bytes) -} - -func (src *source) tail(sz int) { - if src.bytes == nil { - src.str = src.str[sz:] - } else { - src.bytes = src.bytes[sz:] - } -} - -func (src *source) nfd(buf []byte, end int) []byte { - if src.bytes == nil { - return norm.NFD.AppendString(buf[:0], src.str[:end]) - } - return norm.NFD.Append(buf[:0], src.bytes[:end]...) -} - -func (src *source) rune() (r rune, sz int) { - if src.bytes == nil { - return utf8.DecodeRuneInString(src.str) - } - return utf8.DecodeRune(src.bytes) -} - -func (src *source) properties(f norm.Form) norm.Properties { - if src.bytes == nil { - return f.PropertiesString(src.str) - } - return f.Properties(src.bytes) -} - -// appendNext appends the weights corresponding to the next rune or -// contraction in s. If a contraction is matched to a discontinuous -// sequence of runes, the weights for the interstitial runes are -// appended as well. It returns a new slice that includes the appended -// weights and the number of bytes consumed from s. -func (t *Table) appendNext(w []Elem, src source) (res []Elem, n int) { - ce, sz := src.lookup(t) - tp := ce.ctype() - if tp == ceNormal { - if ce == 0 { - r, _ := src.rune() - const ( - hangulSize = 3 - firstHangul = 0xAC00 - lastHangul = 0xD7A3 - ) - if r >= firstHangul && r <= lastHangul { - // TODO: performance can be considerably improved here. - n = sz - var buf [16]byte // Used for decomposing Hangul. - for b := src.nfd(buf[:0], hangulSize); len(b) > 0; b = b[sz:] { - ce, sz = t.Index.lookup(b) - w = append(w, ce) - } - return w, n - } - ce = makeImplicitCE(implicitPrimary(r)) - } - w = append(w, ce) - } else if tp == ceExpansionIndex { - w = t.appendExpansion(w, ce) - } else if tp == ceContractionIndex { - n := 0 - src.tail(sz) - if src.bytes == nil { - w, n = t.matchContractionString(w, ce, src.str) - } else { - w, n = t.matchContraction(w, ce, src.bytes) - } - sz += n - } else if tp == ceDecompose { - // Decompose using NFKD and replace tertiary weights. - t1, t2 := splitDecompose(ce) - i := len(w) - nfkd := src.properties(norm.NFKD).Decomposition() - for p := 0; len(nfkd) > 0; nfkd = nfkd[p:] { - w, p = t.appendNext(w, source{bytes: nfkd}) - } - w[i] = w[i].updateTertiary(t1) - if i++; i < len(w) { - w[i] = w[i].updateTertiary(t2) - for i++; i < len(w); i++ { - w[i] = w[i].updateTertiary(maxTertiary) - } - } - } - return w, sz -} - -func (t *Table) appendExpansion(w []Elem, ce Elem) []Elem { - i := splitExpandIndex(ce) - n := int(t.ExpandElem[i]) - i++ - for _, ce := range t.ExpandElem[i : i+n] { - w = append(w, Elem(ce)) - } - return w -} - -func (t *Table) matchContraction(w []Elem, ce Elem, suffix []byte) ([]Elem, int) { - index, n, offset := splitContractIndex(ce) - - scan := t.ContractTries.scanner(index, n, suffix) - buf := [norm.MaxSegmentSize]byte{} - bufp := 0 - p := scan.scan(0) - - if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf { - // By now we should have filtered most cases. - p0 := p - bufn := 0 - rune := norm.NFD.Properties(suffix[p:]) - p += rune.Size() - if rune.LeadCCC() != 0 { - prevCC := rune.TrailCCC() - // A gap may only occur in the last normalization segment. - // This also ensures that len(scan.s) < norm.MaxSegmentSize. - if end := norm.NFD.FirstBoundary(suffix[p:]); end != -1 { - scan.s = suffix[:p+end] - } - for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf { - rune = norm.NFD.Properties(suffix[p:]) - if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc { - break - } - prevCC = rune.TrailCCC() - if pp := scan.scan(p); pp != p { - // Copy the interstitial runes for later processing. - bufn += copy(buf[bufn:], suffix[p0:p]) - if scan.pindex == pp { - bufp = bufn - } - p, p0 = pp, pp - } else { - p += rune.Size() - } - } - } - } - // Append weights for the matched contraction, which may be an expansion. - i, n := scan.result() - ce = Elem(t.ContractElem[i+offset]) - if ce.ctype() == ceNormal { - w = append(w, ce) - } else { - w = t.appendExpansion(w, ce) - } - // Append weights for the runes in the segment not part of the contraction. - for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] { - w, p = t.appendNext(w, source{bytes: b}) - } - return w, n -} - -// TODO: unify the two implementations. This is best done after first simplifying -// the algorithm taking into account the inclusion of both NFC and NFD forms -// in the table. -func (t *Table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem, int) { - index, n, offset := splitContractIndex(ce) - - scan := t.ContractTries.scannerString(index, n, suffix) - buf := [norm.MaxSegmentSize]byte{} - bufp := 0 - p := scan.scan(0) - - if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf { - // By now we should have filtered most cases. - p0 := p - bufn := 0 - rune := norm.NFD.PropertiesString(suffix[p:]) - p += rune.Size() - if rune.LeadCCC() != 0 { - prevCC := rune.TrailCCC() - // A gap may only occur in the last normalization segment. - // This also ensures that len(scan.s) < norm.MaxSegmentSize. - if end := norm.NFD.FirstBoundaryInString(suffix[p:]); end != -1 { - scan.s = suffix[:p+end] - } - for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf { - rune = norm.NFD.PropertiesString(suffix[p:]) - if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc { - break - } - prevCC = rune.TrailCCC() - if pp := scan.scan(p); pp != p { - // Copy the interstitial runes for later processing. - bufn += copy(buf[bufn:], suffix[p0:p]) - if scan.pindex == pp { - bufp = bufn - } - p, p0 = pp, pp - } else { - p += rune.Size() - } - } - } - } - // Append weights for the matched contraction, which may be an expansion. - i, n := scan.result() - ce = Elem(t.ContractElem[i+offset]) - if ce.ctype() == ceNormal { - w = append(w, ce) - } else { - w = t.appendExpansion(w, ce) - } - // Append weights for the runes in the segment not part of the contraction. - for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] { - w, p = t.appendNext(w, source{bytes: b}) - } - return w, n -} diff --git a/vendor/golang.org/x/text/internal/colltab/trie.go b/vendor/golang.org/x/text/internal/colltab/trie.go deleted file mode 100644 index a0eaa0d..0000000 --- a/vendor/golang.org/x/text/internal/colltab/trie.go +++ /dev/null @@ -1,159 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// The trie in this file is used to associate the first full character in an -// UTF-8 string to a collation element. All but the last byte in a UTF-8 byte -// sequence are used to lookup offsets in the index table to be used for the -// next byte. The last byte is used to index into a table of collation elements. -// For a full description, see go.text/collate/build/trie.go. - -package colltab - -const blockSize = 64 - -type Trie struct { - Index0 []uint16 // index for first byte (0xC0-0xFF) - Values0 []uint32 // index for first byte (0x00-0x7F) - Index []uint16 - Values []uint32 -} - -const ( - t1 = 0x00 // 0000 0000 - tx = 0x80 // 1000 0000 - t2 = 0xC0 // 1100 0000 - t3 = 0xE0 // 1110 0000 - t4 = 0xF0 // 1111 0000 - t5 = 0xF8 // 1111 1000 - t6 = 0xFC // 1111 1100 - te = 0xFE // 1111 1110 -) - -func (t *Trie) lookupValue(n uint16, b byte) Elem { - return Elem(t.Values[int(n)<<6+int(b)]) -} - -// lookup returns the trie value for the first UTF-8 encoding in s and -// the width in bytes of this encoding. The size will be 0 if s does not -// hold enough bytes to complete the encoding. len(s) must be greater than 0. -func (t *Trie) lookup(s []byte) (v Elem, sz int) { - c0 := s[0] - switch { - case c0 < tx: - return Elem(t.Values0[c0]), 1 - case c0 < t2: - return 0, 1 - case c0 < t3: - if len(s) < 2 { - return 0, 0 - } - i := t.Index0[c0] - c1 := s[1] - if c1 < tx || t2 <= c1 { - return 0, 1 - } - return t.lookupValue(i, c1), 2 - case c0 < t4: - if len(s) < 3 { - return 0, 0 - } - i := t.Index0[c0] - c1 := s[1] - if c1 < tx || t2 <= c1 { - return 0, 1 - } - o := int(i)<<6 + int(c1) - i = t.Index[o] - c2 := s[2] - if c2 < tx || t2 <= c2 { - return 0, 2 - } - return t.lookupValue(i, c2), 3 - case c0 < t5: - if len(s) < 4 { - return 0, 0 - } - i := t.Index0[c0] - c1 := s[1] - if c1 < tx || t2 <= c1 { - return 0, 1 - } - o := int(i)<<6 + int(c1) - i = t.Index[o] - c2 := s[2] - if c2 < tx || t2 <= c2 { - return 0, 2 - } - o = int(i)<<6 + int(c2) - i = t.Index[o] - c3 := s[3] - if c3 < tx || t2 <= c3 { - return 0, 3 - } - return t.lookupValue(i, c3), 4 - } - // Illegal rune - return 0, 1 -} - -// The body of lookupString is a verbatim copy of that of lookup. -func (t *Trie) lookupString(s string) (v Elem, sz int) { - c0 := s[0] - switch { - case c0 < tx: - return Elem(t.Values0[c0]), 1 - case c0 < t2: - return 0, 1 - case c0 < t3: - if len(s) < 2 { - return 0, 0 - } - i := t.Index0[c0] - c1 := s[1] - if c1 < tx || t2 <= c1 { - return 0, 1 - } - return t.lookupValue(i, c1), 2 - case c0 < t4: - if len(s) < 3 { - return 0, 0 - } - i := t.Index0[c0] - c1 := s[1] - if c1 < tx || t2 <= c1 { - return 0, 1 - } - o := int(i)<<6 + int(c1) - i = t.Index[o] - c2 := s[2] - if c2 < tx || t2 <= c2 { - return 0, 2 - } - return t.lookupValue(i, c2), 3 - case c0 < t5: - if len(s) < 4 { - return 0, 0 - } - i := t.Index0[c0] - c1 := s[1] - if c1 < tx || t2 <= c1 { - return 0, 1 - } - o := int(i)<<6 + int(c1) - i = t.Index[o] - c2 := s[2] - if c2 < tx || t2 <= c2 { - return 0, 2 - } - o = int(i)<<6 + int(c2) - i = t.Index[o] - c3 := s[3] - if c3 < tx || t2 <= c3 { - return 0, 3 - } - return t.lookupValue(i, c3), 4 - } - // Illegal rune - return 0, 1 -} diff --git a/vendor/golang.org/x/text/internal/colltab/weighter.go b/vendor/golang.org/x/text/internal/colltab/weighter.go deleted file mode 100644 index f1ec45f..0000000 --- a/vendor/golang.org/x/text/internal/colltab/weighter.go +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package colltab // import "golang.org/x/text/internal/colltab" - -// A Weighter can be used as a source for Collator and Searcher. -type Weighter interface { - // Start finds the start of the segment that includes position p. - Start(p int, b []byte) int - - // StartString finds the start of the segment that includes position p. - StartString(p int, s string) int - - // AppendNext appends Elems to buf corresponding to the longest match - // of a single character or contraction from the start of s. - // It returns the new buf and the number of bytes consumed. - AppendNext(buf []Elem, s []byte) (ce []Elem, n int) - - // AppendNextString appends Elems to buf corresponding to the longest match - // of a single character or contraction from the start of s. - // It returns the new buf and the number of bytes consumed. - AppendNextString(buf []Elem, s string) (ce []Elem, n int) - - // Domain returns a slice of all single characters and contractions for which - // collation elements are defined in this table. - Domain() []string - - // Top returns the highest variable primary value. - Top() uint32 -} diff --git a/vendor/golang.org/x/text/internal/gen/code.go b/vendor/golang.org/x/text/internal/gen/code.go deleted file mode 100644 index d7031b6..0000000 --- a/vendor/golang.org/x/text/internal/gen/code.go +++ /dev/null @@ -1,351 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package gen - -import ( - "bytes" - "encoding/gob" - "fmt" - "hash" - "hash/fnv" - "io" - "log" - "os" - "reflect" - "strings" - "unicode" - "unicode/utf8" -) - -// This file contains utilities for generating code. - -// TODO: other write methods like: -// - slices, maps, types, etc. - -// CodeWriter is a utility for writing structured code. It computes the content -// hash and size of written content. It ensures there are newlines between -// written code blocks. -type CodeWriter struct { - buf bytes.Buffer - Size int - Hash hash.Hash32 // content hash - gob *gob.Encoder - // For comments we skip the usual one-line separator if they are followed by - // a code block. - skipSep bool -} - -func (w *CodeWriter) Write(p []byte) (n int, err error) { - return w.buf.Write(p) -} - -// NewCodeWriter returns a new CodeWriter. -func NewCodeWriter() *CodeWriter { - h := fnv.New32() - return &CodeWriter{Hash: h, gob: gob.NewEncoder(h)} -} - -// WriteGoFile appends the buffer with the total size of all created structures -// and writes it as a Go file to the the given file with the given package name. -func (w *CodeWriter) WriteGoFile(filename, pkg string) { - f, err := os.Create(filename) - if err != nil { - log.Fatalf("Could not create file %s: %v", filename, err) - } - defer f.Close() - if _, err = w.WriteGo(f, pkg); err != nil { - log.Fatalf("Error writing file %s: %v", filename, err) - } -} - -// WriteGo appends the buffer with the total size of all created structures and -// writes it as a Go file to the the given writer with the given package name. -func (w *CodeWriter) WriteGo(out io.Writer, pkg string) (n int, err error) { - sz := w.Size - w.WriteComment("Total table size %d bytes (%dKiB); checksum: %X\n", sz, sz/1024, w.Hash.Sum32()) - defer w.buf.Reset() - return WriteGo(out, pkg, w.buf.Bytes()) -} - -func (w *CodeWriter) printf(f string, x ...interface{}) { - fmt.Fprintf(w, f, x...) -} - -func (w *CodeWriter) insertSep() { - if w.skipSep { - w.skipSep = false - return - } - // Use at least two newlines to ensure a blank space between the previous - // block. WriteGoFile will remove extraneous newlines. - w.printf("\n\n") -} - -// WriteComment writes a comment block. All line starts are prefixed with "//". -// Initial empty lines are gobbled. The indentation for the first line is -// stripped from consecutive lines. -func (w *CodeWriter) WriteComment(comment string, args ...interface{}) { - s := fmt.Sprintf(comment, args...) - s = strings.Trim(s, "\n") - - // Use at least two newlines to ensure a blank space between the previous - // block. WriteGoFile will remove extraneous newlines. - w.printf("\n\n// ") - w.skipSep = true - - // strip first indent level. - sep := "\n" - for ; len(s) > 0 && (s[0] == '\t' || s[0] == ' '); s = s[1:] { - sep += s[:1] - } - - strings.NewReplacer(sep, "\n// ", "\n", "\n// ").WriteString(w, s) - - w.printf("\n") -} - -func (w *CodeWriter) writeSizeInfo(size int) { - w.printf("// Size: %d bytes\n", size) -} - -// WriteConst writes a constant of the given name and value. -func (w *CodeWriter) WriteConst(name string, x interface{}) { - w.insertSep() - v := reflect.ValueOf(x) - - switch v.Type().Kind() { - case reflect.String: - w.printf("const %s %s = ", name, typeName(x)) - w.WriteString(v.String()) - w.printf("\n") - default: - w.printf("const %s = %#v\n", name, x) - } -} - -// WriteVar writes a variable of the given name and value. -func (w *CodeWriter) WriteVar(name string, x interface{}) { - w.insertSep() - v := reflect.ValueOf(x) - oldSize := w.Size - sz := int(v.Type().Size()) - w.Size += sz - - switch v.Type().Kind() { - case reflect.String: - w.printf("var %s %s = ", name, typeName(x)) - w.WriteString(v.String()) - case reflect.Struct: - w.gob.Encode(x) - fallthrough - case reflect.Slice, reflect.Array: - w.printf("var %s = ", name) - w.writeValue(v) - w.writeSizeInfo(w.Size - oldSize) - default: - w.printf("var %s %s = ", name, typeName(x)) - w.gob.Encode(x) - w.writeValue(v) - w.writeSizeInfo(w.Size - oldSize) - } - w.printf("\n") -} - -func (w *CodeWriter) writeValue(v reflect.Value) { - x := v.Interface() - switch v.Kind() { - case reflect.String: - w.WriteString(v.String()) - case reflect.Array: - // Don't double count: callers of WriteArray count on the size being - // added, so we need to discount it here. - w.Size -= int(v.Type().Size()) - w.writeSlice(x, true) - case reflect.Slice: - w.writeSlice(x, false) - case reflect.Struct: - w.printf("%s{\n", typeName(v.Interface())) - t := v.Type() - for i := 0; i < v.NumField(); i++ { - w.printf("%s: ", t.Field(i).Name) - w.writeValue(v.Field(i)) - w.printf(",\n") - } - w.printf("}") - default: - w.printf("%#v", x) - } -} - -// WriteString writes a string literal. -func (w *CodeWriter) WriteString(s string) { - s = strings.Replace(s, `\`, `\\`, -1) - io.WriteString(w.Hash, s) // content hash - w.Size += len(s) - - const maxInline = 40 - if len(s) <= maxInline { - w.printf("%q", s) - return - } - - // We will render the string as a multi-line string. - const maxWidth = 80 - 4 - len(`"`) - len(`" +`) - - // When starting on its own line, go fmt indents line 2+ an extra level. - n, max := maxWidth, maxWidth-4 - - // As per https://golang.org/issue/18078, the compiler has trouble - // compiling the concatenation of many strings, s0 + s1 + s2 + ... + sN, - // for large N. We insert redundant, explicit parentheses to work around - // that, lowering the N at any given step: (s0 + s1 + ... + s63) + (s64 + - // ... + s127) + etc + (etc + ... + sN). - explicitParens, extraComment := len(s) > 128*1024, "" - if explicitParens { - w.printf(`(`) - extraComment = "; the redundant, explicit parens are for https://golang.org/issue/18078" - } - - // Print "" +\n, if a string does not start on its own line. - b := w.buf.Bytes() - if p := len(bytes.TrimRight(b, " \t")); p > 0 && b[p-1] != '\n' { - w.printf("\"\" + // Size: %d bytes%s\n", len(s), extraComment) - n, max = maxWidth, maxWidth - } - - w.printf(`"`) - - for sz, p, nLines := 0, 0, 0; p < len(s); { - var r rune - r, sz = utf8.DecodeRuneInString(s[p:]) - out := s[p : p+sz] - chars := 1 - if !unicode.IsPrint(r) || r == utf8.RuneError || r == '"' { - switch sz { - case 1: - out = fmt.Sprintf("\\x%02x", s[p]) - case 2, 3: - out = fmt.Sprintf("\\u%04x", r) - case 4: - out = fmt.Sprintf("\\U%08x", r) - } - chars = len(out) - } - if n -= chars; n < 0 { - nLines++ - if explicitParens && nLines&63 == 63 { - w.printf("\") + (\"") - } - w.printf("\" +\n\"") - n = max - len(out) - } - w.printf("%s", out) - p += sz - } - w.printf(`"`) - if explicitParens { - w.printf(`)`) - } -} - -// WriteSlice writes a slice value. -func (w *CodeWriter) WriteSlice(x interface{}) { - w.writeSlice(x, false) -} - -// WriteArray writes an array value. -func (w *CodeWriter) WriteArray(x interface{}) { - w.writeSlice(x, true) -} - -func (w *CodeWriter) writeSlice(x interface{}, isArray bool) { - v := reflect.ValueOf(x) - w.gob.Encode(v.Len()) - w.Size += v.Len() * int(v.Type().Elem().Size()) - name := typeName(x) - if isArray { - name = fmt.Sprintf("[%d]%s", v.Len(), name[strings.Index(name, "]")+1:]) - } - if isArray { - w.printf("%s{\n", name) - } else { - w.printf("%s{ // %d elements\n", name, v.Len()) - } - - switch kind := v.Type().Elem().Kind(); kind { - case reflect.String: - for _, s := range x.([]string) { - w.WriteString(s) - w.printf(",\n") - } - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: - // nLine and nBlock are the number of elements per line and block. - nLine, nBlock, format := 8, 64, "%d," - switch kind { - case reflect.Uint8: - format = "%#02x," - case reflect.Uint16: - format = "%#04x," - case reflect.Uint32: - nLine, nBlock, format = 4, 32, "%#08x," - case reflect.Uint, reflect.Uint64: - nLine, nBlock, format = 4, 32, "%#016x," - case reflect.Int8: - nLine = 16 - } - n := nLine - for i := 0; i < v.Len(); i++ { - if i%nBlock == 0 && v.Len() > nBlock { - w.printf("// Entry %X - %X\n", i, i+nBlock-1) - } - x := v.Index(i).Interface() - w.gob.Encode(x) - w.printf(format, x) - if n--; n == 0 { - n = nLine - w.printf("\n") - } - } - w.printf("\n") - case reflect.Struct: - zero := reflect.Zero(v.Type().Elem()).Interface() - for i := 0; i < v.Len(); i++ { - x := v.Index(i).Interface() - w.gob.EncodeValue(v) - if !reflect.DeepEqual(zero, x) { - line := fmt.Sprintf("%#v,\n", x) - line = line[strings.IndexByte(line, '{'):] - w.printf("%d: ", i) - w.printf(line) - } - } - case reflect.Array: - for i := 0; i < v.Len(); i++ { - w.printf("%d: %#v,\n", i, v.Index(i).Interface()) - } - default: - panic("gen: slice elem type not supported") - } - w.printf("}") -} - -// WriteType writes a definition of the type of the given value and returns the -// type name. -func (w *CodeWriter) WriteType(x interface{}) string { - t := reflect.TypeOf(x) - w.printf("type %s struct {\n", t.Name()) - for i := 0; i < t.NumField(); i++ { - w.printf("\t%s %s\n", t.Field(i).Name, t.Field(i).Type) - } - w.printf("}\n") - return t.Name() -} - -// typeName returns the name of the go type of x. -func typeName(x interface{}) string { - t := reflect.ValueOf(x).Type() - return strings.Replace(fmt.Sprint(t), "main.", "", 1) -} diff --git a/vendor/golang.org/x/text/internal/gen/gen.go b/vendor/golang.org/x/text/internal/gen/gen.go deleted file mode 100644 index 2acb035..0000000 --- a/vendor/golang.org/x/text/internal/gen/gen.go +++ /dev/null @@ -1,281 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package gen contains common code for the various code generation tools in the -// text repository. Its usage ensures consistency between tools. -// -// This package defines command line flags that are common to most generation -// tools. The flags allow for specifying specific Unicode and CLDR versions -// in the public Unicode data repository (http://www.unicode.org/Public). -// -// A local Unicode data mirror can be set through the flag -local or the -// environment variable UNICODE_DIR. The former takes precedence. The local -// directory should follow the same structure as the public repository. -// -// IANA data can also optionally be mirrored by putting it in the iana directory -// rooted at the top of the local mirror. Beware, though, that IANA data is not -// versioned. So it is up to the developer to use the right version. -package gen // import "golang.org/x/text/internal/gen" - -import ( - "bytes" - "flag" - "fmt" - "go/build" - "go/format" - "io" - "io/ioutil" - "log" - "net/http" - "os" - "path" - "path/filepath" - "sync" - "unicode" - - "golang.org/x/text/unicode/cldr" -) - -var ( - url = flag.String("url", - "http://www.unicode.org/Public", - "URL of Unicode database directory") - iana = flag.String("iana", - "http://www.iana.org", - "URL of the IANA repository") - unicodeVersion = flag.String("unicode", - getEnv("UNICODE_VERSION", unicode.Version), - "unicode version to use") - cldrVersion = flag.String("cldr", - getEnv("CLDR_VERSION", cldr.Version), - "cldr version to use") -) - -func getEnv(name, def string) string { - if v := os.Getenv(name); v != "" { - return v - } - return def -} - -// Init performs common initialization for a gen command. It parses the flags -// and sets up the standard logging parameters. -func Init() { - log.SetPrefix("") - log.SetFlags(log.Lshortfile) - flag.Parse() -} - -const header = `// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. - -package %s - -` - -// UnicodeVersion reports the requested Unicode version. -func UnicodeVersion() string { - return *unicodeVersion -} - -// UnicodeVersion reports the requested CLDR version. -func CLDRVersion() string { - return *cldrVersion -} - -// IsLocal reports whether data files are available locally. -func IsLocal() bool { - dir, err := localReadmeFile() - if err != nil { - return false - } - if _, err = os.Stat(dir); err != nil { - return false - } - return true -} - -// OpenUCDFile opens the requested UCD file. The file is specified relative to -// the public Unicode root directory. It will call log.Fatal if there are any -// errors. -func OpenUCDFile(file string) io.ReadCloser { - return openUnicode(path.Join(*unicodeVersion, "ucd", file)) -} - -// OpenCLDRCoreZip opens the CLDR core zip file. It will call log.Fatal if there -// are any errors. -func OpenCLDRCoreZip() io.ReadCloser { - return OpenUnicodeFile("cldr", *cldrVersion, "core.zip") -} - -// OpenUnicodeFile opens the requested file of the requested category from the -// root of the Unicode data archive. The file is specified relative to the -// public Unicode root directory. If version is "", it will use the default -// Unicode version. It will call log.Fatal if there are any errors. -func OpenUnicodeFile(category, version, file string) io.ReadCloser { - if version == "" { - version = UnicodeVersion() - } - return openUnicode(path.Join(category, version, file)) -} - -// OpenIANAFile opens the requested IANA file. The file is specified relative -// to the IANA root, which is typically either http://www.iana.org or the -// iana directory in the local mirror. It will call log.Fatal if there are any -// errors. -func OpenIANAFile(path string) io.ReadCloser { - return Open(*iana, "iana", path) -} - -var ( - dirMutex sync.Mutex - localDir string -) - -const permissions = 0755 - -func localReadmeFile() (string, error) { - p, err := build.Import("golang.org/x/text", "", build.FindOnly) - if err != nil { - return "", fmt.Errorf("Could not locate package: %v", err) - } - return filepath.Join(p.Dir, "DATA", "README"), nil -} - -func getLocalDir() string { - dirMutex.Lock() - defer dirMutex.Unlock() - - readme, err := localReadmeFile() - if err != nil { - log.Fatal(err) - } - dir := filepath.Dir(readme) - if _, err := os.Stat(readme); err != nil { - if err := os.MkdirAll(dir, permissions); err != nil { - log.Fatalf("Could not create directory: %v", err) - } - ioutil.WriteFile(readme, []byte(readmeTxt), permissions) - } - return dir -} - -const readmeTxt = `Generated by golang.org/x/text/internal/gen. DO NOT EDIT. - -This directory contains downloaded files used to generate the various tables -in the golang.org/x/text subrepo. - -Note that the language subtag repo (iana/assignments/language-subtag-registry) -and all other times in the iana subdirectory are not versioned and will need -to be periodically manually updated. The easiest way to do this is to remove -the entire iana directory. This is mostly of concern when updating the language -package. -` - -// Open opens subdir/path if a local directory is specified and the file exists, -// where subdir is a directory relative to the local root, or fetches it from -// urlRoot/path otherwise. It will call log.Fatal if there are any errors. -func Open(urlRoot, subdir, path string) io.ReadCloser { - file := filepath.Join(getLocalDir(), subdir, filepath.FromSlash(path)) - return open(file, urlRoot, path) -} - -func openUnicode(path string) io.ReadCloser { - file := filepath.Join(getLocalDir(), filepath.FromSlash(path)) - return open(file, *url, path) -} - -// TODO: automatically periodically update non-versioned files. - -func open(file, urlRoot, path string) io.ReadCloser { - if f, err := os.Open(file); err == nil { - return f - } - r := get(urlRoot, path) - defer r.Close() - b, err := ioutil.ReadAll(r) - if err != nil { - log.Fatalf("Could not download file: %v", err) - } - os.MkdirAll(filepath.Dir(file), permissions) - if err := ioutil.WriteFile(file, b, permissions); err != nil { - log.Fatalf("Could not create file: %v", err) - } - return ioutil.NopCloser(bytes.NewReader(b)) -} - -func get(root, path string) io.ReadCloser { - url := root + "/" + path - fmt.Printf("Fetching %s...", url) - defer fmt.Println(" done.") - resp, err := http.Get(url) - if err != nil { - log.Fatalf("HTTP GET: %v", err) - } - if resp.StatusCode != 200 { - log.Fatalf("Bad GET status for %q: %q", url, resp.Status) - } - return resp.Body -} - -// TODO: use Write*Version in all applicable packages. - -// WriteUnicodeVersion writes a constant for the Unicode version from which the -// tables are generated. -func WriteUnicodeVersion(w io.Writer) { - fmt.Fprintf(w, "// UnicodeVersion is the Unicode version from which the tables in this package are derived.\n") - fmt.Fprintf(w, "const UnicodeVersion = %q\n\n", UnicodeVersion()) -} - -// WriteCLDRVersion writes a constant for the CLDR version from which the -// tables are generated. -func WriteCLDRVersion(w io.Writer) { - fmt.Fprintf(w, "// CLDRVersion is the CLDR version from which the tables in this package are derived.\n") - fmt.Fprintf(w, "const CLDRVersion = %q\n\n", CLDRVersion()) -} - -// WriteGoFile prepends a standard file comment and package statement to the -// given bytes, applies gofmt, and writes them to a file with the given name. -// It will call log.Fatal if there are any errors. -func WriteGoFile(filename, pkg string, b []byte) { - w, err := os.Create(filename) - if err != nil { - log.Fatalf("Could not create file %s: %v", filename, err) - } - defer w.Close() - if _, err = WriteGo(w, pkg, b); err != nil { - log.Fatalf("Error writing file %s: %v", filename, err) - } -} - -// WriteGo prepends a standard file comment and package statement to the given -// bytes, applies gofmt, and writes them to w. -func WriteGo(w io.Writer, pkg string, b []byte) (n int, err error) { - src := []byte(fmt.Sprintf(header, pkg)) - src = append(src, b...) - formatted, err := format.Source(src) - if err != nil { - // Print the generated code even in case of an error so that the - // returned error can be meaningfully interpreted. - n, _ = w.Write(src) - return n, err - } - return w.Write(formatted) -} - -// Repackage rewrites a Go file from belonging to package main to belonging to -// the given package. -func Repackage(inFile, outFile, pkg string) { - src, err := ioutil.ReadFile(inFile) - if err != nil { - log.Fatalf("reading %s: %v", inFile, err) - } - const toDelete = "package main\n\n" - i := bytes.Index(src, []byte(toDelete)) - if i < 0 { - log.Fatalf("Could not find %q in %s.", toDelete, inFile) - } - w := &bytes.Buffer{} - w.Write(src[i+len(toDelete):]) - WriteGoFile(outFile, pkg, w.Bytes()) -} diff --git a/vendor/golang.org/x/text/internal/tag/tag.go b/vendor/golang.org/x/text/internal/tag/tag.go deleted file mode 100644 index b5d3488..0000000 --- a/vendor/golang.org/x/text/internal/tag/tag.go +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package tag contains functionality handling tags and related data. -package tag // import "golang.org/x/text/internal/tag" - -import "sort" - -// An Index converts tags to a compact numeric value. -// -// All elements are of size 4. Tags may be up to 4 bytes long. Excess bytes can -// be used to store additional information about the tag. -type Index string - -// Elem returns the element data at the given index. -func (s Index) Elem(x int) string { - return string(s[x*4 : x*4+4]) -} - -// Index reports the index of the given key or -1 if it could not be found. -// Only the first len(key) bytes from the start of the 4-byte entries will be -// considered for the search and the first match in Index will be returned. -func (s Index) Index(key []byte) int { - n := len(key) - // search the index of the first entry with an equal or higher value than - // key in s. - index := sort.Search(len(s)/4, func(i int) bool { - return cmp(s[i*4:i*4+n], key) != -1 - }) - i := index * 4 - if cmp(s[i:i+len(key)], key) != 0 { - return -1 - } - return index -} - -// Next finds the next occurrence of key after index x, which must have been -// obtained from a call to Index using the same key. It returns x+1 or -1. -func (s Index) Next(key []byte, x int) int { - if x++; x*4 < len(s) && cmp(s[x*4:x*4+len(key)], key) == 0 { - return x - } - return -1 -} - -// cmp returns an integer comparing a and b lexicographically. -func cmp(a Index, b []byte) int { - n := len(a) - if len(b) < n { - n = len(b) - } - for i, c := range b[:n] { - switch { - case a[i] > c: - return 1 - case a[i] < c: - return -1 - } - } - switch { - case len(a) < len(b): - return -1 - case len(a) > len(b): - return 1 - } - return 0 -} - -// Compare returns an integer comparing a and b lexicographically. -func Compare(a string, b []byte) int { - return cmp(Index(a), b) -} - -// FixCase reformats b to the same pattern of cases as form. -// If returns false if string b is malformed. -func FixCase(form string, b []byte) bool { - if len(form) != len(b) { - return false - } - for i, c := range b { - if form[i] <= 'Z' { - if c >= 'a' { - c -= 'z' - 'Z' - } - if c < 'A' || 'Z' < c { - return false - } - } else { - if c <= 'Z' { - c += 'z' - 'Z' - } - if c < 'a' || 'z' < c { - return false - } - } - b[i] = c - } - return true -} diff --git a/vendor/golang.org/x/text/internal/triegen/compact.go b/vendor/golang.org/x/text/internal/triegen/compact.go deleted file mode 100644 index 397b975..0000000 --- a/vendor/golang.org/x/text/internal/triegen/compact.go +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package triegen - -// This file defines Compacter and its implementations. - -import "io" - -// A Compacter generates an alternative, more space-efficient way to store a -// trie value block. A trie value block holds all possible values for the last -// byte of a UTF-8 encoded rune. Excluding ASCII characters, a trie value block -// always has 64 values, as a UTF-8 encoding ends with a byte in [0x80, 0xC0). -type Compacter interface { - // Size returns whether the Compacter could encode the given block as well - // as its size in case it can. len(v) is always 64. - Size(v []uint64) (sz int, ok bool) - - // Store stores the block using the Compacter's compression method. - // It returns a handle with which the block can be retrieved. - // len(v) is always 64. - Store(v []uint64) uint32 - - // Print writes the data structures associated to the given store to w. - Print(w io.Writer) error - - // Handler returns the name of a function that gets called during trie - // lookup for blocks generated by the Compacter. The function should be of - // the form func (n uint32, b byte) uint64, where n is the index returned by - // the Compacter's Store method and b is the last byte of the UTF-8 - // encoding, where 0x80 <= b < 0xC0, for which to do the lookup in the - // block. - Handler() string -} - -// simpleCompacter is the default Compacter used by builder. It implements a -// normal trie block. -type simpleCompacter builder - -func (b *simpleCompacter) Size([]uint64) (sz int, ok bool) { - return blockSize * b.ValueSize, true -} - -func (b *simpleCompacter) Store(v []uint64) uint32 { - h := uint32(len(b.ValueBlocks) - blockOffset) - b.ValueBlocks = append(b.ValueBlocks, v) - return h -} - -func (b *simpleCompacter) Print(io.Writer) error { - // Structures are printed in print.go. - return nil -} - -func (b *simpleCompacter) Handler() string { - panic("Handler should be special-cased for this Compacter") -} diff --git a/vendor/golang.org/x/text/internal/triegen/print.go b/vendor/golang.org/x/text/internal/triegen/print.go deleted file mode 100644 index 8d9f120..0000000 --- a/vendor/golang.org/x/text/internal/triegen/print.go +++ /dev/null @@ -1,251 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package triegen - -import ( - "bytes" - "fmt" - "io" - "strings" - "text/template" -) - -// print writes all the data structures as well as the code necessary to use the -// trie to w. -func (b *builder) print(w io.Writer) error { - b.Stats.NValueEntries = len(b.ValueBlocks) * blockSize - b.Stats.NValueBytes = len(b.ValueBlocks) * blockSize * b.ValueSize - b.Stats.NIndexEntries = len(b.IndexBlocks) * blockSize - b.Stats.NIndexBytes = len(b.IndexBlocks) * blockSize * b.IndexSize - b.Stats.NHandleBytes = len(b.Trie) * 2 * b.IndexSize - - // If we only have one root trie, all starter blocks are at position 0 and - // we can access the arrays directly. - if len(b.Trie) == 1 { - // At this point we cannot refer to the generated tables directly. - b.ASCIIBlock = b.Name + "Values" - b.StarterBlock = b.Name + "Index" - } else { - // Otherwise we need to have explicit starter indexes in the trie - // structure. - b.ASCIIBlock = "t.ascii" - b.StarterBlock = "t.utf8Start" - } - - b.SourceType = "[]byte" - if err := lookupGen.Execute(w, b); err != nil { - return err - } - - b.SourceType = "string" - if err := lookupGen.Execute(w, b); err != nil { - return err - } - - if err := trieGen.Execute(w, b); err != nil { - return err - } - - for _, c := range b.Compactions { - if err := c.c.Print(w); err != nil { - return err - } - } - - return nil -} - -func printValues(n int, values []uint64) string { - w := &bytes.Buffer{} - boff := n * blockSize - fmt.Fprintf(w, "\t// Block %#x, offset %#x", n, boff) - var newline bool - for i, v := range values { - if i%6 == 0 { - newline = true - } - if v != 0 { - if newline { - fmt.Fprintf(w, "\n") - newline = false - } - fmt.Fprintf(w, "\t%#02x:%#04x, ", boff+i, v) - } - } - return w.String() -} - -func printIndex(b *builder, nr int, n *node) string { - w := &bytes.Buffer{} - boff := nr * blockSize - fmt.Fprintf(w, "\t// Block %#x, offset %#x", nr, boff) - var newline bool - for i, c := range n.children { - if i%8 == 0 { - newline = true - } - if c != nil { - v := b.Compactions[c.index.compaction].Offset + uint32(c.index.index) - if v != 0 { - if newline { - fmt.Fprintf(w, "\n") - newline = false - } - fmt.Fprintf(w, "\t%#02x:%#02x, ", boff+i, v) - } - } - } - return w.String() -} - -var ( - trieGen = template.Must(template.New("trie").Funcs(template.FuncMap{ - "printValues": printValues, - "printIndex": printIndex, - "title": strings.Title, - "dec": func(x int) int { return x - 1 }, - "psize": func(n int) string { - return fmt.Sprintf("%d bytes (%.2f KiB)", n, float64(n)/1024) - }, - }).Parse(trieTemplate)) - lookupGen = template.Must(template.New("lookup").Parse(lookupTemplate)) -) - -// TODO: consider the return type of lookup. It could be uint64, even if the -// internal value type is smaller. We will have to verify this with the -// performance of unicode/norm, which is very sensitive to such changes. -const trieTemplate = `{{$b := .}}{{$multi := gt (len .Trie) 1}} -// {{.Name}}Trie. Total size: {{psize .Size}}. Checksum: {{printf "%08x" .Checksum}}. -type {{.Name}}Trie struct { {{if $multi}} - ascii []{{.ValueType}} // index for ASCII bytes - utf8Start []{{.IndexType}} // index for UTF-8 bytes >= 0xC0 -{{end}}} - -func new{{title .Name}}Trie(i int) *{{.Name}}Trie { {{if $multi}} - h := {{.Name}}TrieHandles[i] - return &{{.Name}}Trie{ {{.Name}}Values[uint32(h.ascii)<<6:], {{.Name}}Index[uint32(h.multi)<<6:] } -} - -type {{.Name}}TrieHandle struct { - ascii, multi {{.IndexType}} -} - -// {{.Name}}TrieHandles: {{len .Trie}} handles, {{.Stats.NHandleBytes}} bytes -var {{.Name}}TrieHandles = [{{len .Trie}}]{{.Name}}TrieHandle{ -{{range .Trie}} { {{.ASCIIIndex}}, {{.StarterIndex}} }, // {{printf "%08x" .Checksum}}: {{.Name}} -{{end}}}{{else}} - return &{{.Name}}Trie{} -} -{{end}} -// lookupValue determines the type of block n and looks up the value for b. -func (t *{{.Name}}Trie) lookupValue(n uint32, b byte) {{.ValueType}}{{$last := dec (len .Compactions)}} { - switch { {{range $i, $c := .Compactions}} - {{if eq $i $last}}default{{else}}case n < {{$c.Cutoff}}{{end}}:{{if ne $i 0}} - n -= {{$c.Offset}}{{end}} - return {{print $b.ValueType}}({{$c.Handler}}){{end}} - } -} - -// {{.Name}}Values: {{len .ValueBlocks}} blocks, {{.Stats.NValueEntries}} entries, {{.Stats.NValueBytes}} bytes -// The third block is the zero block. -var {{.Name}}Values = [{{.Stats.NValueEntries}}]{{.ValueType}} { -{{range $i, $v := .ValueBlocks}}{{printValues $i $v}} -{{end}}} - -// {{.Name}}Index: {{len .IndexBlocks}} blocks, {{.Stats.NIndexEntries}} entries, {{.Stats.NIndexBytes}} bytes -// Block 0 is the zero block. -var {{.Name}}Index = [{{.Stats.NIndexEntries}}]{{.IndexType}} { -{{range $i, $v := .IndexBlocks}}{{printIndex $b $i $v}} -{{end}}} -` - -// TODO: consider allowing zero-length strings after evaluating performance with -// unicode/norm. -const lookupTemplate = ` -// lookup{{if eq .SourceType "string"}}String{{end}} returns the trie value for the first UTF-8 encoding in s and -// the width in bytes of this encoding. The size will be 0 if s does not -// hold enough bytes to complete the encoding. len(s) must be greater than 0. -func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}(s {{.SourceType}}) (v {{.ValueType}}, sz int) { - c0 := s[0] - switch { - case c0 < 0x80: // is ASCII - return {{.ASCIIBlock}}[c0], 1 - case c0 < 0xC2: - return 0, 1 // Illegal UTF-8: not a starter, not ASCII. - case c0 < 0xE0: // 2-byte UTF-8 - if len(s) < 2 { - return 0, 0 - } - i := {{.StarterBlock}}[c0] - c1 := s[1] - if c1 < 0x80 || 0xC0 <= c1 { - return 0, 1 // Illegal UTF-8: not a continuation byte. - } - return t.lookupValue(uint32(i), c1), 2 - case c0 < 0xF0: // 3-byte UTF-8 - if len(s) < 3 { - return 0, 0 - } - i := {{.StarterBlock}}[c0] - c1 := s[1] - if c1 < 0x80 || 0xC0 <= c1 { - return 0, 1 // Illegal UTF-8: not a continuation byte. - } - o := uint32(i)<<6 + uint32(c1) - i = {{.Name}}Index[o] - c2 := s[2] - if c2 < 0x80 || 0xC0 <= c2 { - return 0, 2 // Illegal UTF-8: not a continuation byte. - } - return t.lookupValue(uint32(i), c2), 3 - case c0 < 0xF8: // 4-byte UTF-8 - if len(s) < 4 { - return 0, 0 - } - i := {{.StarterBlock}}[c0] - c1 := s[1] - if c1 < 0x80 || 0xC0 <= c1 { - return 0, 1 // Illegal UTF-8: not a continuation byte. - } - o := uint32(i)<<6 + uint32(c1) - i = {{.Name}}Index[o] - c2 := s[2] - if c2 < 0x80 || 0xC0 <= c2 { - return 0, 2 // Illegal UTF-8: not a continuation byte. - } - o = uint32(i)<<6 + uint32(c2) - i = {{.Name}}Index[o] - c3 := s[3] - if c3 < 0x80 || 0xC0 <= c3 { - return 0, 3 // Illegal UTF-8: not a continuation byte. - } - return t.lookupValue(uint32(i), c3), 4 - } - // Illegal rune - return 0, 1 -} - -// lookup{{if eq .SourceType "string"}}String{{end}}Unsafe returns the trie value for the first UTF-8 encoding in s. -// s must start with a full and valid UTF-8 encoded rune. -func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}Unsafe(s {{.SourceType}}) {{.ValueType}} { - c0 := s[0] - if c0 < 0x80 { // is ASCII - return {{.ASCIIBlock}}[c0] - } - i := {{.StarterBlock}}[c0] - if c0 < 0xE0 { // 2-byte UTF-8 - return t.lookupValue(uint32(i), s[1]) - } - i = {{.Name}}Index[uint32(i)<<6+uint32(s[1])] - if c0 < 0xF0 { // 3-byte UTF-8 - return t.lookupValue(uint32(i), s[2]) - } - i = {{.Name}}Index[uint32(i)<<6+uint32(s[2])] - if c0 < 0xF8 { // 4-byte UTF-8 - return t.lookupValue(uint32(i), s[3]) - } - return 0 -} -` diff --git a/vendor/golang.org/x/text/internal/triegen/triegen.go b/vendor/golang.org/x/text/internal/triegen/triegen.go deleted file mode 100644 index adb0108..0000000 --- a/vendor/golang.org/x/text/internal/triegen/triegen.go +++ /dev/null @@ -1,494 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package triegen implements a code generator for a trie for associating -// unsigned integer values with UTF-8 encoded runes. -// -// Many of the go.text packages use tries for storing per-rune information. A -// trie is especially useful if many of the runes have the same value. If this -// is the case, many blocks can be expected to be shared allowing for -// information on many runes to be stored in little space. -// -// As most of the lookups are done directly on []byte slices, the tries use the -// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to -// runes and contributes a little bit to better performance. It also naturally -// provides a fast path for ASCII. -// -// Space is also an issue. There are many code points defined in Unicode and as -// a result tables can get quite large. So every byte counts. The triegen -// package automatically chooses the smallest integer values to represent the -// tables. Compacters allow further compression of the trie by allowing for -// alternative representations of individual trie blocks. -// -// triegen allows generating multiple tries as a single structure. This is -// useful when, for example, one wants to generate tries for several languages -// that have a lot of values in common. Some existing libraries for -// internationalization store all per-language data as a dynamically loadable -// chunk. The go.text packages are designed with the assumption that the user -// typically wants to compile in support for all supported languages, in line -// with the approach common to Go to create a single standalone binary. The -// multi-root trie approach can give significant storage savings in this -// scenario. -// -// triegen generates both tables and code. The code is optimized to use the -// automatically chosen data types. The following code is generated for a Trie -// or multiple Tries named "foo": -// - type fooTrie -// The trie type. -// -// - func newFooTrie(x int) *fooTrie -// Trie constructor, where x is the index of the trie passed to Gen. -// -// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int) -// The lookup method, where uintX is automatically chosen. -// -// - func lookupString, lookupUnsafe and lookupStringUnsafe -// Variants of the above. -// -// - var fooValues and fooIndex and any tables generated by Compacters. -// The core trie data. -// -// - var fooTrieHandles -// Indexes of starter blocks in case of multiple trie roots. -// -// It is recommended that users test the generated trie by checking the returned -// value for every rune. Such exhaustive tests are possible as the the number of -// runes in Unicode is limited. -package triegen // import "golang.org/x/text/internal/triegen" - -// TODO: Arguably, the internally optimized data types would not have to be -// exposed in the generated API. We could also investigate not generating the -// code, but using it through a package. We would have to investigate the impact -// on performance of making such change, though. For packages like unicode/norm, -// small changes like this could tank performance. - -import ( - "encoding/binary" - "fmt" - "hash/crc64" - "io" - "log" - "unicode/utf8" -) - -// builder builds a set of tries for associating values with runes. The set of -// tries can share common index and value blocks. -type builder struct { - Name string - - // ValueType is the type of the trie values looked up. - ValueType string - - // ValueSize is the byte size of the ValueType. - ValueSize int - - // IndexType is the type of trie index values used for all UTF-8 bytes of - // a rune except the last one. - IndexType string - - // IndexSize is the byte size of the IndexType. - IndexSize int - - // SourceType is used when generating the lookup functions. If the user - // requests StringSupport, all lookup functions will be generated for - // string input as well. - SourceType string - - Trie []*Trie - - IndexBlocks []*node - ValueBlocks [][]uint64 - Compactions []compaction - Checksum uint64 - - ASCIIBlock string - StarterBlock string - - indexBlockIdx map[uint64]int - valueBlockIdx map[uint64]nodeIndex - asciiBlockIdx map[uint64]int - - // Stats are used to fill out the template. - Stats struct { - NValueEntries int - NValueBytes int - NIndexEntries int - NIndexBytes int - NHandleBytes int - } - - err error -} - -// A nodeIndex encodes the index of a node, which is defined by the compaction -// which stores it and an index within the compaction. For internal nodes, the -// compaction is always 0. -type nodeIndex struct { - compaction int - index int -} - -// compaction keeps track of stats used for the compaction. -type compaction struct { - c Compacter - blocks []*node - maxHandle uint32 - totalSize int - - // Used by template-based generator and thus exported. - Cutoff uint32 - Offset uint32 - Handler string -} - -func (b *builder) setError(err error) { - if b.err == nil { - b.err = err - } -} - -// An Option can be passed to Gen. -type Option func(b *builder) error - -// Compact configures the trie generator to use the given Compacter. -func Compact(c Compacter) Option { - return func(b *builder) error { - b.Compactions = append(b.Compactions, compaction{ - c: c, - Handler: c.Handler() + "(n, b)"}) - return nil - } -} - -// Gen writes Go code for a shared trie lookup structure to w for the given -// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will -// return the *nameTrie for tries[x]. A value can be looked up by using one of -// the various lookup methods defined on nameTrie. It returns the table size of -// the generated trie. -func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) { - // The index contains two dummy blocks, followed by the zero block. The zero - // block is at offset 0x80, so that the offset for the zero block for - // continuation bytes is 0. - b := &builder{ - Name: name, - Trie: tries, - IndexBlocks: []*node{{}, {}, {}}, - Compactions: []compaction{{ - Handler: name + "Values[n<<6+uint32(b)]", - }}, - // The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero - // block. - indexBlockIdx: map[uint64]int{0: 0}, - valueBlockIdx: map[uint64]nodeIndex{0: {}}, - asciiBlockIdx: map[uint64]int{}, - } - b.Compactions[0].c = (*simpleCompacter)(b) - - for _, f := range opts { - if err := f(b); err != nil { - return 0, err - } - } - b.build() - if b.err != nil { - return 0, b.err - } - if err = b.print(w); err != nil { - return 0, err - } - return b.Size(), nil -} - -// A Trie represents a single root node of a trie. A builder may build several -// overlapping tries at once. -type Trie struct { - root *node - - hiddenTrie -} - -// hiddenTrie contains values we want to be visible to the template generator, -// but hidden from the API documentation. -type hiddenTrie struct { - Name string - Checksum uint64 - ASCIIIndex int - StarterIndex int -} - -// NewTrie returns a new trie root. -func NewTrie(name string) *Trie { - return &Trie{ - &node{ - children: make([]*node, blockSize), - values: make([]uint64, utf8.RuneSelf), - }, - hiddenTrie{Name: name}, - } -} - -// Gen is a convenience wrapper around the Gen func passing t as the only trie -// and uses the name passed to NewTrie. It returns the size of the generated -// tables. -func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) { - return Gen(w, t.Name, []*Trie{t}, opts...) -} - -// node is a node of the intermediate trie structure. -type node struct { - // children holds this node's children. It is always of length 64. - // A child node may be nil. - children []*node - - // values contains the values of this node. If it is non-nil, this node is - // either a root or leaf node: - // For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F]. - // For leaf nodes, len(values) == 64 and it maps the bytes in [0x80, 0xBF]. - values []uint64 - - index nodeIndex -} - -// Insert associates value with the given rune. Insert will panic if a non-zero -// value is passed for an invalid rune. -func (t *Trie) Insert(r rune, value uint64) { - if value == 0 { - return - } - s := string(r) - if []rune(s)[0] != r && value != 0 { - // Note: The UCD tables will always assign what amounts to a zero value - // to a surrogate. Allowing a zero value for an illegal rune allows - // users to iterate over [0..MaxRune] without having to explicitly - // exclude surrogates, which would be tedious. - panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r)) - } - if len(s) == 1 { - // It is a root node value (ASCII). - t.root.values[s[0]] = value - return - } - - n := t.root - for ; len(s) > 1; s = s[1:] { - if n.children == nil { - n.children = make([]*node, blockSize) - } - p := s[0] % blockSize - c := n.children[p] - if c == nil { - c = &node{} - n.children[p] = c - } - if len(s) > 2 && c.values != nil { - log.Fatalf("triegen: insert(%U): found internal node with values", r) - } - n = c - } - if n.values == nil { - n.values = make([]uint64, blockSize) - } - if n.children != nil { - log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r) - } - n.values[s[0]-0x80] = value -} - -// Size returns the number of bytes the generated trie will take to store. It -// needs to be exported as it is used in the templates. -func (b *builder) Size() int { - // Index blocks. - sz := len(b.IndexBlocks) * blockSize * b.IndexSize - - // Skip the first compaction, which represents the normal value blocks, as - // its totalSize does not account for the ASCII blocks, which are managed - // separately. - sz += len(b.ValueBlocks) * blockSize * b.ValueSize - for _, c := range b.Compactions[1:] { - sz += c.totalSize - } - - // TODO: this computation does not account for the fixed overhead of a using - // a compaction, either code or data. As for data, though, the typical - // overhead of data is in the order of bytes (2 bytes for cases). Further, - // the savings of using a compaction should anyway be substantial for it to - // be worth it. - - // For multi-root tries, we also need to account for the handles. - if len(b.Trie) > 1 { - sz += 2 * b.IndexSize * len(b.Trie) - } - return sz -} - -func (b *builder) build() { - // Compute the sizes of the values. - var vmax uint64 - for _, t := range b.Trie { - vmax = maxValue(t.root, vmax) - } - b.ValueType, b.ValueSize = getIntType(vmax) - - // Compute all block allocations. - // TODO: first compute the ASCII blocks for all tries and then the other - // nodes. ASCII blocks are more restricted in placement, as they require two - // blocks to be placed consecutively. Processing them first may improve - // sharing (at least one zero block can be expected to be saved.) - for _, t := range b.Trie { - b.Checksum += b.buildTrie(t) - } - - // Compute the offsets for all the Compacters. - offset := uint32(0) - for i := range b.Compactions { - c := &b.Compactions[i] - c.Offset = offset - offset += c.maxHandle + 1 - c.Cutoff = offset - } - - // Compute the sizes of indexes. - // TODO: different byte positions could have different sizes. So far we have - // not found a case where this is beneficial. - imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff) - for _, ib := range b.IndexBlocks { - if x := uint64(ib.index.index); x > imax { - imax = x - } - } - b.IndexType, b.IndexSize = getIntType(imax) -} - -func maxValue(n *node, max uint64) uint64 { - if n == nil { - return max - } - for _, c := range n.children { - max = maxValue(c, max) - } - for _, v := range n.values { - if max < v { - max = v - } - } - return max -} - -func getIntType(v uint64) (string, int) { - switch { - case v < 1<<8: - return "uint8", 1 - case v < 1<<16: - return "uint16", 2 - case v < 1<<32: - return "uint32", 4 - } - return "uint64", 8 -} - -const ( - blockSize = 64 - - // Subtract two blocks to offset 0x80, the first continuation byte. - blockOffset = 2 - - // Subtract three blocks to offset 0xC0, the first non-ASCII starter. - rootBlockOffset = 3 -) - -var crcTable = crc64.MakeTable(crc64.ISO) - -func (b *builder) buildTrie(t *Trie) uint64 { - n := t.root - - // Get the ASCII offset. For the first trie, the ASCII block will be at - // position 0. - hasher := crc64.New(crcTable) - binary.Write(hasher, binary.BigEndian, n.values) - hash := hasher.Sum64() - - v, ok := b.asciiBlockIdx[hash] - if !ok { - v = len(b.ValueBlocks) - b.asciiBlockIdx[hash] = v - - b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:]) - if v == 0 { - // Add the zero block at position 2 so that it will be assigned a - // zero reference in the lookup blocks. - // TODO: always do this? This would allow us to remove a check from - // the trie lookup, but at the expense of extra space. Analyze - // performance for unicode/norm. - b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize)) - } - } - t.ASCIIIndex = v - - // Compute remaining offsets. - t.Checksum = b.computeOffsets(n, true) - // We already subtracted the normal blockOffset from the index. Subtract the - // difference for starter bytes. - t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset) - return t.Checksum -} - -func (b *builder) computeOffsets(n *node, root bool) uint64 { - // For the first trie, the root lookup block will be at position 3, which is - // the offset for UTF-8 non-ASCII starter bytes. - first := len(b.IndexBlocks) == rootBlockOffset - if first { - b.IndexBlocks = append(b.IndexBlocks, n) - } - - // We special-case the cases where all values recursively are 0. This allows - // for the use of a zero block to which all such values can be directed. - hash := uint64(0) - if n.children != nil || n.values != nil { - hasher := crc64.New(crcTable) - for _, c := range n.children { - var v uint64 - if c != nil { - v = b.computeOffsets(c, false) - } - binary.Write(hasher, binary.BigEndian, v) - } - binary.Write(hasher, binary.BigEndian, n.values) - hash = hasher.Sum64() - } - - if first { - b.indexBlockIdx[hash] = rootBlockOffset - blockOffset - } - - // Compacters don't apply to internal nodes. - if n.children != nil { - v, ok := b.indexBlockIdx[hash] - if !ok { - v = len(b.IndexBlocks) - blockOffset - b.IndexBlocks = append(b.IndexBlocks, n) - b.indexBlockIdx[hash] = v - } - n.index = nodeIndex{0, v} - } else { - h, ok := b.valueBlockIdx[hash] - if !ok { - bestI, bestSize := 0, blockSize*b.ValueSize - for i, c := range b.Compactions[1:] { - if sz, ok := c.c.Size(n.values); ok && bestSize > sz { - bestI, bestSize = i+1, sz - } - } - c := &b.Compactions[bestI] - c.totalSize += bestSize - v := c.c.Store(n.values) - if c.maxHandle < v { - c.maxHandle = v - } - h = nodeIndex{bestI, int(v)} - b.valueBlockIdx[hash] = h - } - n.index = h - } - return hash -} diff --git a/vendor/golang.org/x/text/internal/ucd/ucd.go b/vendor/golang.org/x/text/internal/ucd/ucd.go deleted file mode 100644 index 8c45b5f..0000000 --- a/vendor/golang.org/x/text/internal/ucd/ucd.go +++ /dev/null @@ -1,371 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package ucd provides a parser for Unicode Character Database files, the -// format of which is defined in http://www.unicode.org/reports/tr44/. See -// http://www.unicode.org/Public/UCD/latest/ucd/ for example files. -// -// It currently does not support substitutions of missing fields. -package ucd // import "golang.org/x/text/internal/ucd" - -import ( - "bufio" - "errors" - "fmt" - "io" - "log" - "regexp" - "strconv" - "strings" -) - -// UnicodeData.txt fields. -const ( - CodePoint = iota - Name - GeneralCategory - CanonicalCombiningClass - BidiClass - DecompMapping - DecimalValue - DigitValue - NumericValue - BidiMirrored - Unicode1Name - ISOComment - SimpleUppercaseMapping - SimpleLowercaseMapping - SimpleTitlecaseMapping -) - -// Parse calls f for each entry in the given reader of a UCD file. It will close -// the reader upon return. It will call log.Fatal if any error occurred. -// -// This implements the most common usage pattern of using Parser. -func Parse(r io.ReadCloser, f func(p *Parser)) { - defer r.Close() - - p := New(r) - for p.Next() { - f(p) - } - if err := p.Err(); err != nil { - r.Close() // os.Exit will cause defers not to be called. - log.Fatal(err) - } -} - -// An Option is used to configure a Parser. -type Option func(p *Parser) - -func keepRanges(p *Parser) { - p.keepRanges = true -} - -var ( - // KeepRanges prevents the expansion of ranges. The raw ranges can be - // obtained by calling Range(0) on the parser. - KeepRanges Option = keepRanges -) - -// The Part option register a handler for lines starting with a '@'. The text -// after a '@' is available as the first field. Comments are handled as usual. -func Part(f func(p *Parser)) Option { - return func(p *Parser) { - p.partHandler = f - } -} - -// The CommentHandler option passes comments that are on a line by itself to -// a given handler. -func CommentHandler(f func(s string)) Option { - return func(p *Parser) { - p.commentHandler = f - } -} - -// A Parser parses Unicode Character Database (UCD) files. -type Parser struct { - scanner *bufio.Scanner - - keepRanges bool // Don't expand rune ranges in field 0. - - err error - comment string - field []string - // parsedRange is needed in case Range(0) is called more than once for one - // field. In some cases this requires scanning ahead. - line int - parsedRange bool - rangeStart, rangeEnd rune - - partHandler func(p *Parser) - commentHandler func(s string) -} - -func (p *Parser) setError(err error, msg string) { - if p.err == nil && err != nil { - if msg == "" { - p.err = fmt.Errorf("ucd:line:%d: %v", p.line, err) - } else { - p.err = fmt.Errorf("ucd:line:%d:%s: %v", p.line, msg, err) - } - } -} - -func (p *Parser) getField(i int) string { - if i >= len(p.field) { - return "" - } - return p.field[i] -} - -// Err returns a non-nil error if any error occurred during parsing. -func (p *Parser) Err() error { - return p.err -} - -// New returns a Parser for the given Reader. -func New(r io.Reader, o ...Option) *Parser { - p := &Parser{ - scanner: bufio.NewScanner(r), - } - for _, f := range o { - f(p) - } - return p -} - -// Next parses the next line in the file. It returns true if a line was parsed -// and false if it reached the end of the file. -func (p *Parser) Next() bool { - if !p.keepRanges && p.rangeStart < p.rangeEnd { - p.rangeStart++ - return true - } - p.comment = "" - p.field = p.field[:0] - p.parsedRange = false - - for p.scanner.Scan() && p.err == nil { - p.line++ - s := p.scanner.Text() - if s == "" { - continue - } - if s[0] == '#' { - if p.commentHandler != nil { - p.commentHandler(strings.TrimSpace(s[1:])) - } - continue - } - - // Parse line - if i := strings.IndexByte(s, '#'); i != -1 { - p.comment = strings.TrimSpace(s[i+1:]) - s = s[:i] - } - if s[0] == '@' { - if p.partHandler != nil { - p.field = append(p.field, strings.TrimSpace(s[1:])) - p.partHandler(p) - p.field = p.field[:0] - } - p.comment = "" - continue - } - for { - i := strings.IndexByte(s, ';') - if i == -1 { - p.field = append(p.field, strings.TrimSpace(s)) - break - } - p.field = append(p.field, strings.TrimSpace(s[:i])) - s = s[i+1:] - } - if !p.keepRanges { - p.rangeStart, p.rangeEnd = p.getRange(0) - } - return true - } - p.setError(p.scanner.Err(), "scanner failed") - return false -} - -func parseRune(b string) (rune, error) { - if len(b) > 2 && b[0] == 'U' && b[1] == '+' { - b = b[2:] - } - x, err := strconv.ParseUint(b, 16, 32) - return rune(x), err -} - -func (p *Parser) parseRune(s string) rune { - x, err := parseRune(s) - p.setError(err, "failed to parse rune") - return x -} - -// Rune parses and returns field i as a rune. -func (p *Parser) Rune(i int) rune { - if i > 0 || p.keepRanges { - return p.parseRune(p.getField(i)) - } - return p.rangeStart -} - -// Runes interprets and returns field i as a sequence of runes. -func (p *Parser) Runes(i int) (runes []rune) { - add := func(s string) { - if s = strings.TrimSpace(s); len(s) > 0 { - runes = append(runes, p.parseRune(s)) - } - } - for b := p.getField(i); ; { - i := strings.IndexByte(b, ' ') - if i == -1 { - add(b) - break - } - add(b[:i]) - b = b[i+1:] - } - return -} - -var ( - errIncorrectLegacyRange = errors.New("ucd: unmatched <* First>") - - // reRange matches one line of a legacy rune range. - reRange = regexp.MustCompile("^([0-9A-F]*);<([^,]*), ([^>]*)>(.*)$") -) - -// Range parses and returns field i as a rune range. A range is inclusive at -// both ends. If the field only has one rune, first and last will be identical. -// It supports the legacy format for ranges used in UnicodeData.txt. -func (p *Parser) Range(i int) (first, last rune) { - if !p.keepRanges { - return p.rangeStart, p.rangeStart - } - return p.getRange(i) -} - -func (p *Parser) getRange(i int) (first, last rune) { - b := p.getField(i) - if k := strings.Index(b, ".."); k != -1 { - return p.parseRune(b[:k]), p.parseRune(b[k+2:]) - } - // The first field may not be a rune, in which case we may ignore any error - // and set the range as 0..0. - x, err := parseRune(b) - if err != nil { - // Disable range parsing henceforth. This ensures that an error will be - // returned if the user subsequently will try to parse this field as - // a Rune. - p.keepRanges = true - } - // Special case for UnicodeData that was retained for backwards compatibility. - if i == 0 && len(p.field) > 1 && strings.HasSuffix(p.field[1], "First>") { - if p.parsedRange { - return p.rangeStart, p.rangeEnd - } - mf := reRange.FindStringSubmatch(p.scanner.Text()) - p.line++ - if mf == nil || !p.scanner.Scan() { - p.setError(errIncorrectLegacyRange, "") - return x, x - } - // Using Bytes would be more efficient here, but Text is a lot easier - // and this is not a frequent case. - ml := reRange.FindStringSubmatch(p.scanner.Text()) - if ml == nil || mf[2] != ml[2] || ml[3] != "Last" || mf[4] != ml[4] { - p.setError(errIncorrectLegacyRange, "") - return x, x - } - p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Text()[:len(ml[1])]) - p.parsedRange = true - return p.rangeStart, p.rangeEnd - } - return x, x -} - -// bools recognizes all valid UCD boolean values. -var bools = map[string]bool{ - "": false, - "N": false, - "No": false, - "F": false, - "False": false, - "Y": true, - "Yes": true, - "T": true, - "True": true, -} - -// Bool parses and returns field i as a boolean value. -func (p *Parser) Bool(i int) bool { - f := p.getField(i) - for s, v := range bools { - if f == s { - return v - } - } - p.setError(strconv.ErrSyntax, "error parsing bool") - return false -} - -// Int parses and returns field i as an integer value. -func (p *Parser) Int(i int) int { - x, err := strconv.ParseInt(string(p.getField(i)), 10, 64) - p.setError(err, "error parsing int") - return int(x) -} - -// Uint parses and returns field i as an unsigned integer value. -func (p *Parser) Uint(i int) uint { - x, err := strconv.ParseUint(string(p.getField(i)), 10, 64) - p.setError(err, "error parsing uint") - return uint(x) -} - -// Float parses and returns field i as a decimal value. -func (p *Parser) Float(i int) float64 { - x, err := strconv.ParseFloat(string(p.getField(i)), 64) - p.setError(err, "error parsing float") - return x -} - -// String parses and returns field i as a string value. -func (p *Parser) String(i int) string { - return string(p.getField(i)) -} - -// Strings parses and returns field i as a space-separated list of strings. -func (p *Parser) Strings(i int) []string { - ss := strings.Split(string(p.getField(i)), " ") - for i, s := range ss { - ss[i] = strings.TrimSpace(s) - } - return ss -} - -// Comment returns the comments for the current line. -func (p *Parser) Comment() string { - return string(p.comment) -} - -var errUndefinedEnum = errors.New("ucd: undefined enum value") - -// Enum interprets and returns field i as a value that must be one of the values -// in enum. -func (p *Parser) Enum(i int, enum ...string) string { - f := p.getField(i) - for _, s := range enum { - if f == s { - return s - } - } - p.setError(errUndefinedEnum, "error parsing enum") - return "" -} |