aboutsummaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/collate/build/colelem.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/collate/build/colelem.go')
-rw-r--r--vendor/golang.org/x/text/collate/build/colelem.go294
1 files changed, 294 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/collate/build/colelem.go b/vendor/golang.org/x/text/collate/build/colelem.go
new file mode 100644
index 0000000..726fe54
--- /dev/null
+++ b/vendor/golang.org/x/text/collate/build/colelem.go
@@ -0,0 +1,294 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package build
+
+import (
+ "fmt"
+ "unicode"
+
+ "golang.org/x/text/internal/colltab"
+)
+
+const (
+ defaultSecondary = 0x20
+ defaultTertiary = 0x2
+ maxTertiary = 0x1F
+)
+
+type rawCE struct {
+ w []int
+ ccc uint8
+}
+
+func makeRawCE(w []int, ccc uint8) rawCE {
+ ce := rawCE{w: make([]int, 4), ccc: ccc}
+ copy(ce.w, w)
+ return ce
+}
+
+// A collation element is represented as an uint32.
+// In the typical case, a rune maps to a single collation element. If a rune
+// can be the start of a contraction or expands into multiple collation elements,
+// then the collation element that is associated with a rune will have a special
+// form to represent such m to n mappings. Such special collation elements
+// have a value >= 0x80000000.
+
+const (
+ maxPrimaryBits = 21
+ maxSecondaryBits = 12
+ maxTertiaryBits = 8
+)
+
+func makeCE(ce rawCE) (uint32, error) {
+ v, e := colltab.MakeElem(ce.w[0], ce.w[1], ce.w[2], ce.ccc)
+ return uint32(v), e
+}
+
+// For contractions, collation elements are of the form
+// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
+// - n* is the size of the first node in the contraction trie.
+// - i* is the index of the first node in the contraction trie.
+// - b* is the offset into the contraction collation element table.
+// See contract.go for details on the contraction trie.
+const (
+ contractID = 0xC0000000
+ maxNBits = 4
+ maxTrieIndexBits = 12
+ maxContractOffsetBits = 13
+)
+
+func makeContractIndex(h ctHandle, offset int) (uint32, error) {
+ if h.n >= 1<<maxNBits {
+ return 0, fmt.Errorf("size of contraction trie node too large: %d >= %d", h.n, 1<<maxNBits)
+ }
+ if h.index >= 1<<maxTrieIndexBits {
+ return 0, fmt.Errorf("size of contraction trie offset too large: %d >= %d", h.index, 1<<maxTrieIndexBits)
+ }
+ if offset >= 1<<maxContractOffsetBits {
+ return 0, fmt.Errorf("contraction offset out of bounds: %x >= %x", offset, 1<<maxContractOffsetBits)
+ }
+ ce := uint32(contractID)
+ ce += uint32(offset << (maxNBits + maxTrieIndexBits))
+ ce += uint32(h.index << maxNBits)
+ ce += uint32(h.n)
+ return ce, nil
+}
+
+// For expansions, collation elements are of the form
+// 11100000 00000000 bbbbbbbb bbbbbbbb,
+// where b* is the index into the expansion sequence table.
+const (
+ expandID = 0xE0000000
+ maxExpandIndexBits = 16
+)
+
+func makeExpandIndex(index int) (uint32, error) {
+ if index >= 1<<maxExpandIndexBits {
+ return 0, fmt.Errorf("expansion index out of bounds: %x >= %x", index, 1<<maxExpandIndexBits)
+ }
+ return expandID + uint32(index), nil
+}
+
+// Each list of collation elements corresponding to an expansion starts with
+// a header indicating the length of the sequence.
+func makeExpansionHeader(n int) (uint32, error) {
+ return uint32(n), nil
+}
+
+// Some runes can be expanded using NFKD decomposition. Instead of storing the full
+// sequence of collation elements, we decompose the rune and lookup the collation
+// elements for each rune in the decomposition and modify the tertiary weights.
+// The collation element, in this case, is of the form
+// 11110000 00000000 wwwwwwww vvvvvvvv, where
+// - v* is the replacement tertiary weight for the first rune,
+// - w* is the replacement tertiary weight for the second rune,
+// Tertiary weights of subsequent runes should be replaced with maxTertiary.
+// See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
+const (
+ decompID = 0xF0000000
+)
+
+func makeDecompose(t1, t2 int) (uint32, error) {
+ if t1 >= 256 || t1 < 0 {
+ return 0, fmt.Errorf("first tertiary weight out of bounds: %d >= 256", t1)
+ }
+ if t2 >= 256 || t2 < 0 {
+ return 0, fmt.Errorf("second tertiary weight out of bounds: %d >= 256", t2)
+ }
+ return uint32(t2<<8+t1) + decompID, nil
+}
+
+const (
+ // These constants were taken from http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
+ minUnified rune = 0x4E00
+ maxUnified = 0x9FFF
+ minCompatibility = 0xF900
+ maxCompatibility = 0xFAFF
+ minRare = 0x3400
+ maxRare = 0x4DBF
+)
+const (
+ commonUnifiedOffset = 0x10000
+ rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF
+ otherOffset = 0x50000 // largest rune in rare is U+2FA1D
+ illegalOffset = otherOffset + int(unicode.MaxRune)
+ maxPrimary = illegalOffset + 1
+)
+
+// implicitPrimary returns the primary weight for the a rune
+// for which there is no entry for the rune in the collation table.
+// We take a different approach from the one specified in
+// http://unicode.org/reports/tr10/#Implicit_Weights,
+// but preserve the resulting relative ordering of the runes.
+func implicitPrimary(r rune) int {
+ if unicode.Is(unicode.Ideographic, r) {
+ if r >= minUnified && r <= maxUnified {
+ // The most common case for CJK.
+ return int(r) + commonUnifiedOffset
+ }
+ if r >= minCompatibility && r <= maxCompatibility {
+ // This will typically not hit. The DUCET explicitly specifies mappings
+ // for all characters that do not decompose.
+ return int(r) + commonUnifiedOffset
+ }
+ return int(r) + rareUnifiedOffset
+ }
+ return int(r) + otherOffset
+}
+
+// convertLargeWeights converts collation elements with large
+// primaries (either double primaries or for illegal runes)
+// to our own representation.
+// A CJK character C is represented in the DUCET as
+// [.FBxx.0020.0002.C][.BBBB.0000.0000.C]
+// We will rewrite these characters to a single CE.
+// We assume the CJK values start at 0x8000.
+// See http://unicode.org/reports/tr10/#Implicit_Weights
+func convertLargeWeights(elems []rawCE) (res []rawCE, err error) {
+ const (
+ cjkPrimaryStart = 0xFB40
+ rarePrimaryStart = 0xFB80
+ otherPrimaryStart = 0xFBC0
+ illegalPrimary = 0xFFFE
+ highBitsMask = 0x3F
+ lowBitsMask = 0x7FFF
+ lowBitsFlag = 0x8000
+ shiftBits = 15
+ )
+ for i := 0; i < len(elems); i++ {
+ ce := elems[i].w
+ p := ce[0]
+ if p < cjkPrimaryStart {
+ continue
+ }
+ if p > 0xFFFF {
+ return elems, fmt.Errorf("found primary weight %X; should be <= 0xFFFF", p)
+ }
+ if p >= illegalPrimary {
+ ce[0] = illegalOffset + p - illegalPrimary
+ } else {
+ if i+1 >= len(elems) {
+ return elems, fmt.Errorf("second part of double primary weight missing: %v", elems)
+ }
+ if elems[i+1].w[0]&lowBitsFlag == 0 {
+ return elems, fmt.Errorf("malformed second part of double primary weight: %v", elems)
+ }
+ np := ((p & highBitsMask) << shiftBits) + elems[i+1].w[0]&lowBitsMask
+ switch {
+ case p < rarePrimaryStart:
+ np += commonUnifiedOffset
+ case p < otherPrimaryStart:
+ np += rareUnifiedOffset
+ default:
+ p += otherOffset
+ }
+ ce[0] = np
+ for j := i + 1; j+1 < len(elems); j++ {
+ elems[j] = elems[j+1]
+ }
+ elems = elems[:len(elems)-1]
+ }
+ }
+ return elems, nil
+}
+
+// nextWeight computes the first possible collation weights following elems
+// for the given level.
+func nextWeight(level colltab.Level, elems []rawCE) []rawCE {
+ if level == colltab.Identity {
+ next := make([]rawCE, len(elems))
+ copy(next, elems)
+ return next
+ }
+ next := []rawCE{makeRawCE(elems[0].w, elems[0].ccc)}
+ next[0].w[level]++
+ if level < colltab.Secondary {
+ next[0].w[colltab.Secondary] = defaultSecondary
+ }
+ if level < colltab.Tertiary {
+ next[0].w[colltab.Tertiary] = defaultTertiary
+ }
+ // Filter entries that cannot influence ordering.
+ for _, ce := range elems[1:] {
+ skip := true
+ for i := colltab.Primary; i < level; i++ {
+ skip = skip && ce.w[i] == 0
+ }
+ if !skip {
+ next = append(next, ce)
+ }
+ }
+ return next
+}
+
+func nextVal(elems []rawCE, i int, level colltab.Level) (index, value int) {
+ for ; i < len(elems) && elems[i].w[level] == 0; i++ {
+ }
+ if i < len(elems) {
+ return i, elems[i].w[level]
+ }
+ return i, 0
+}
+
+// compareWeights returns -1 if a < b, 1 if a > b, or 0 otherwise.
+// It also returns the collation level at which the difference is found.
+func compareWeights(a, b []rawCE) (result int, level colltab.Level) {
+ for level := colltab.Primary; level < colltab.Identity; level++ {
+ var va, vb int
+ for ia, ib := 0, 0; ia < len(a) || ib < len(b); ia, ib = ia+1, ib+1 {
+ ia, va = nextVal(a, ia, level)
+ ib, vb = nextVal(b, ib, level)
+ if va != vb {
+ if va < vb {
+ return -1, level
+ } else {
+ return 1, level
+ }
+ }
+ }
+ }
+ return 0, colltab.Identity
+}
+
+func equalCE(a, b rawCE) bool {
+ for i := 0; i < 3; i++ {
+ if b.w[i] != a.w[i] {
+ return false
+ }
+ }
+ return true
+}
+
+func equalCEArrays(a, b []rawCE) bool {
+ if len(a) != len(b) {
+ return false
+ }
+ for i := range a {
+ if !equalCE(a[i], b[i]) {
+ return false
+ }
+ }
+ return true
+}