aboutsummaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/collate/build
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/collate/build')
-rw-r--r--vendor/golang.org/x/text/collate/build/builder.go702
-rw-r--r--vendor/golang.org/x/text/collate/build/colelem.go294
-rw-r--r--vendor/golang.org/x/text/collate/build/contract.go309
-rw-r--r--vendor/golang.org/x/text/collate/build/order.go393
-rw-r--r--vendor/golang.org/x/text/collate/build/table.go81
-rw-r--r--vendor/golang.org/x/text/collate/build/trie.go290
6 files changed, 0 insertions, 2069 deletions
diff --git a/vendor/golang.org/x/text/collate/build/builder.go b/vendor/golang.org/x/text/collate/build/builder.go
deleted file mode 100644
index 1104284..0000000
--- a/vendor/golang.org/x/text/collate/build/builder.go
+++ /dev/null
@@ -1,702 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package build // import "golang.org/x/text/collate/build"
-
-import (
- "fmt"
- "io"
- "log"
- "sort"
- "strings"
- "unicode/utf8"
-
- "golang.org/x/text/internal/colltab"
- "golang.org/x/text/language"
- "golang.org/x/text/unicode/norm"
-)
-
-// TODO: optimizations:
-// - expandElem is currently 20K. By putting unique colElems in a separate
-// table and having a byte array of indexes into this table, we can reduce
-// the total size to about 7K. By also factoring out the length bytes, we
-// can reduce this to about 6K.
-// - trie valueBlocks are currently 100K. There are a lot of sparse blocks
-// and many consecutive values with the same stride. This can be further
-// compacted.
-// - Compress secondary weights into 8 bits.
-// - Some LDML specs specify a context element. Currently we simply concatenate
-// those. Context can be implemented using the contraction trie. If Builder
-// could analyze and detect when using a context makes sense, there is no
-// need to expose this construct in the API.
-
-// A Builder builds a root collation table. The user must specify the
-// collation elements for each entry. A common use will be to base the weights
-// on those specified in the allkeys* file as provided by the UCA or CLDR.
-type Builder struct {
- index *trieBuilder
- root ordering
- locale []*Tailoring
- t *table
- err error
- built bool
-
- minNonVar int // lowest primary recorded for a variable
- varTop int // highest primary recorded for a non-variable
-
- // indexes used for reusing expansions and contractions
- expIndex map[string]int // positions of expansions keyed by their string representation
- ctHandle map[string]ctHandle // contraction handles keyed by a concatenation of the suffixes
- ctElem map[string]int // contraction elements keyed by their string representation
-}
-
-// A Tailoring builds a collation table based on another collation table.
-// The table is defined by specifying tailorings to the underlying table.
-// See http://unicode.org/reports/tr35/ for an overview of tailoring
-// collation tables. The CLDR contains pre-defined tailorings for a variety
-// of languages (See http://www.unicode.org/Public/cldr/<version>/core.zip.)
-type Tailoring struct {
- id string
- builder *Builder
- index *ordering
-
- anchor *entry
- before bool
-}
-
-// NewBuilder returns a new Builder.
-func NewBuilder() *Builder {
- return &Builder{
- index: newTrieBuilder(),
- root: makeRootOrdering(),
- expIndex: make(map[string]int),
- ctHandle: make(map[string]ctHandle),
- ctElem: make(map[string]int),
- }
-}
-
-// Tailoring returns a Tailoring for the given locale. One should
-// have completed all calls to Add before calling Tailoring.
-func (b *Builder) Tailoring(loc language.Tag) *Tailoring {
- t := &Tailoring{
- id: loc.String(),
- builder: b,
- index: b.root.clone(),
- }
- t.index.id = t.id
- b.locale = append(b.locale, t)
- return t
-}
-
-// Add adds an entry to the collation element table, mapping
-// a slice of runes to a sequence of collation elements.
-// A collation element is specified as list of weights: []int{primary, secondary, ...}.
-// The entries are typically obtained from a collation element table
-// as defined in http://www.unicode.org/reports/tr10/#Data_Table_Format.
-// Note that the collation elements specified by colelems are only used
-// as a guide. The actual weights generated by Builder may differ.
-// The argument variables is a list of indices into colelems that should contain
-// a value for each colelem that is a variable. (See the reference above.)
-func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
- str := string(runes)
- elems := make([]rawCE, len(colelems))
- for i, ce := range colelems {
- if len(ce) == 0 {
- break
- }
- elems[i] = makeRawCE(ce, 0)
- if len(ce) == 1 {
- elems[i].w[1] = defaultSecondary
- }
- if len(ce) <= 2 {
- elems[i].w[2] = defaultTertiary
- }
- if len(ce) <= 3 {
- elems[i].w[3] = ce[0]
- }
- }
- for i, ce := range elems {
- p := ce.w[0]
- isvar := false
- for _, j := range variables {
- if i == j {
- isvar = true
- }
- }
- if isvar {
- if p >= b.minNonVar && b.minNonVar > 0 {
- return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", p, b.minNonVar)
- }
- if p > b.varTop {
- b.varTop = p
- }
- } else if p > 1 { // 1 is a special primary value reserved for FFFE
- if p <= b.varTop {
- return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", p, b.varTop)
- }
- if b.minNonVar == 0 || p < b.minNonVar {
- b.minNonVar = p
- }
- }
- }
- elems, err := convertLargeWeights(elems)
- if err != nil {
- return err
- }
- cccs := []uint8{}
- nfd := norm.NFD.String(str)
- for i := range nfd {
- cccs = append(cccs, norm.NFD.PropertiesString(nfd[i:]).CCC())
- }
- if len(cccs) < len(elems) {
- if len(cccs) > 2 {
- return fmt.Errorf("number of decomposed characters should be greater or equal to the number of collation elements for len(colelems) > 3 (%d < %d)", len(cccs), len(elems))
- }
- p := len(elems) - 1
- for ; p > 0 && elems[p].w[0] == 0; p-- {
- elems[p].ccc = cccs[len(cccs)-1]
- }
- for ; p >= 0; p-- {
- elems[p].ccc = cccs[0]
- }
- } else {
- for i := range elems {
- elems[i].ccc = cccs[i]
- }
- }
- // doNorm in collate.go assumes that the following conditions hold.
- if len(elems) > 1 && len(cccs) > 1 && cccs[0] != 0 && cccs[0] != cccs[len(cccs)-1] {
- return fmt.Errorf("incompatible CCC values for expansion %X (%d)", runes, cccs)
- }
- b.root.newEntry(str, elems)
- return nil
-}
-
-func (t *Tailoring) setAnchor(anchor string) error {
- anchor = norm.NFC.String(anchor)
- a := t.index.find(anchor)
- if a == nil {
- a = t.index.newEntry(anchor, nil)
- a.implicit = true
- a.modified = true
- for _, r := range []rune(anchor) {
- e := t.index.find(string(r))
- e.lock = true
- }
- }
- t.anchor = a
- return nil
-}
-
-// SetAnchor sets the point after which elements passed in subsequent calls to
-// Insert will be inserted. It is equivalent to the reset directive in an LDML
-// specification. See Insert for an example.
-// SetAnchor supports the following logical reset positions:
-// <first_tertiary_ignorable/>, <last_teriary_ignorable/>, <first_primary_ignorable/>,
-// and <last_non_ignorable/>.
-func (t *Tailoring) SetAnchor(anchor string) error {
- if err := t.setAnchor(anchor); err != nil {
- return err
- }
- t.before = false
- return nil
-}
-
-// SetAnchorBefore is similar to SetAnchor, except that subsequent calls to
-// Insert will insert entries before the anchor.
-func (t *Tailoring) SetAnchorBefore(anchor string) error {
- if err := t.setAnchor(anchor); err != nil {
- return err
- }
- t.before = true
- return nil
-}
-
-// Insert sets the ordering of str relative to the entry set by the previous
-// call to SetAnchor or Insert. The argument extend corresponds
-// to the extend elements as defined in LDML. A non-empty value for extend
-// will cause the collation elements corresponding to extend to be appended
-// to the collation elements generated for the entry added by Insert.
-// This has the same net effect as sorting str after the string anchor+extend.
-// See http://www.unicode.org/reports/tr10/#Tailoring_Example for details
-// on parametric tailoring and http://unicode.org/reports/tr35/#Collation_Elements
-// for full details on LDML.
-//
-// Examples: create a tailoring for Swedish, where "ä" is ordered after "z"
-// at the primary sorting level:
-// t := b.Tailoring("se")
-// t.SetAnchor("z")
-// t.Insert(colltab.Primary, "ä", "")
-// Order "ü" after "ue" at the secondary sorting level:
-// t.SetAnchor("ue")
-// t.Insert(colltab.Secondary, "ü","")
-// or
-// t.SetAnchor("u")
-// t.Insert(colltab.Secondary, "ü", "e")
-// Order "q" afer "ab" at the secondary level and "Q" after "q"
-// at the tertiary level:
-// t.SetAnchor("ab")
-// t.Insert(colltab.Secondary, "q", "")
-// t.Insert(colltab.Tertiary, "Q", "")
-// Order "b" before "a":
-// t.SetAnchorBefore("a")
-// t.Insert(colltab.Primary, "b", "")
-// Order "0" after the last primary ignorable:
-// t.SetAnchor("<last_primary_ignorable/>")
-// t.Insert(colltab.Primary, "0", "")
-func (t *Tailoring) Insert(level colltab.Level, str, extend string) error {
- if t.anchor == nil {
- return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str)
- }
- str = norm.NFC.String(str)
- e := t.index.find(str)
- if e == nil {
- e = t.index.newEntry(str, nil)
- } else if e.logical != noAnchor {
- return fmt.Errorf("%s:Insert: cannot reinsert logical reset position %q", t.id, e.str)
- }
- if e.lock {
- return fmt.Errorf("%s:Insert: cannot reinsert element %q", t.id, e.str)
- }
- a := t.anchor
- // Find the first element after the anchor which differs at a level smaller or
- // equal to the given level. Then insert at this position.
- // See http://unicode.org/reports/tr35/#Collation_Elements, Section 5.14.5 for details.
- e.before = t.before
- if t.before {
- t.before = false
- if a.prev == nil {
- a.insertBefore(e)
- } else {
- for a = a.prev; a.level > level; a = a.prev {
- }
- a.insertAfter(e)
- }
- e.level = level
- } else {
- for ; a.level > level; a = a.next {
- }
- e.level = a.level
- if a != e {
- a.insertAfter(e)
- a.level = level
- } else {
- // We don't set a to prev itself. This has the effect of the entry
- // getting new collation elements that are an increment of itself.
- // This is intentional.
- a.prev.level = level
- }
- }
- e.extend = norm.NFD.String(extend)
- e.exclude = false
- e.modified = true
- e.elems = nil
- t.anchor = e
- return nil
-}
-
-func (o *ordering) getWeight(e *entry) []rawCE {
- if len(e.elems) == 0 && e.logical == noAnchor {
- if e.implicit {
- for _, r := range e.runes {
- e.elems = append(e.elems, o.getWeight(o.find(string(r)))...)
- }
- } else if e.before {
- count := [colltab.Identity + 1]int{}
- a := e
- for ; a.elems == nil && !a.implicit; a = a.next {
- count[a.level]++
- }
- e.elems = []rawCE{makeRawCE(a.elems[0].w, a.elems[0].ccc)}
- for i := colltab.Primary; i < colltab.Quaternary; i++ {
- if count[i] != 0 {
- e.elems[0].w[i] -= count[i]
- break
- }
- }
- if e.prev != nil {
- o.verifyWeights(e.prev, e, e.prev.level)
- }
- } else {
- prev := e.prev
- e.elems = nextWeight(prev.level, o.getWeight(prev))
- o.verifyWeights(e, e.next, e.level)
- }
- }
- return e.elems
-}
-
-func (o *ordering) addExtension(e *entry) {
- if ex := o.find(e.extend); ex != nil {
- e.elems = append(e.elems, ex.elems...)
- } else {
- for _, r := range []rune(e.extend) {
- e.elems = append(e.elems, o.find(string(r)).elems...)
- }
- }
- e.extend = ""
-}
-
-func (o *ordering) verifyWeights(a, b *entry, level colltab.Level) error {
- if level == colltab.Identity || b == nil || b.elems == nil || a.elems == nil {
- return nil
- }
- for i := colltab.Primary; i < level; i++ {
- if a.elems[0].w[i] < b.elems[0].w[i] {
- return nil
- }
- }
- if a.elems[0].w[level] >= b.elems[0].w[level] {
- err := fmt.Errorf("%s:overflow: collation elements of %q (%X) overflows those of %q (%X) at level %d (%X >= %X)", o.id, a.str, a.runes, b.str, b.runes, level, a.elems, b.elems)
- log.Println(err)
- // TODO: return the error instead, or better, fix the conflicting entry by making room.
- }
- return nil
-}
-
-func (b *Builder) error(e error) {
- if e != nil {
- b.err = e
- }
-}
-
-func (b *Builder) errorID(locale string, e error) {
- if e != nil {
- b.err = fmt.Errorf("%s:%v", locale, e)
- }
-}
-
-// patchNorm ensures that NFC and NFD counterparts are consistent.
-func (o *ordering) patchNorm() {
- // Insert the NFD counterparts, if necessary.
- for _, e := range o.ordered {
- nfd := norm.NFD.String(e.str)
- if nfd != e.str {
- if e0 := o.find(nfd); e0 != nil && !e0.modified {
- e0.elems = e.elems
- } else if e.modified && !equalCEArrays(o.genColElems(nfd), e.elems) {
- e := o.newEntry(nfd, e.elems)
- e.modified = true
- }
- }
- }
- // Update unchanged composed forms if one of their parts changed.
- for _, e := range o.ordered {
- nfd := norm.NFD.String(e.str)
- if e.modified || nfd == e.str {
- continue
- }
- if e0 := o.find(nfd); e0 != nil {
- e.elems = e0.elems
- } else {
- e.elems = o.genColElems(nfd)
- if norm.NFD.LastBoundary([]byte(nfd)) == 0 {
- r := []rune(nfd)
- head := string(r[0])
- tail := ""
- for i := 1; i < len(r); i++ {
- s := norm.NFC.String(head + string(r[i]))
- if e0 := o.find(s); e0 != nil && e0.modified {
- head = s
- } else {
- tail += string(r[i])
- }
- }
- e.elems = append(o.genColElems(head), o.genColElems(tail)...)
- }
- }
- }
- // Exclude entries for which the individual runes generate the same collation elements.
- for _, e := range o.ordered {
- if len(e.runes) > 1 && equalCEArrays(o.genColElems(e.str), e.elems) {
- e.exclude = true
- }
- }
-}
-
-func (b *Builder) buildOrdering(o *ordering) {
- for _, e := range o.ordered {
- o.getWeight(e)
- }
- for _, e := range o.ordered {
- o.addExtension(e)
- }
- o.patchNorm()
- o.sort()
- simplify(o)
- b.processExpansions(o) // requires simplify
- b.processContractions(o) // requires simplify
-
- t := newNode()
- for e := o.front(); e != nil; e, _ = e.nextIndexed() {
- if !e.skip() {
- ce, err := e.encode()
- b.errorID(o.id, err)
- t.insert(e.runes[0], ce)
- }
- }
- o.handle = b.index.addTrie(t)
-}
-
-func (b *Builder) build() (*table, error) {
- if b.built {
- return b.t, b.err
- }
- b.built = true
- b.t = &table{
- Table: colltab.Table{
- MaxContractLen: utf8.UTFMax,
- VariableTop: uint32(b.varTop),
- },
- }
-
- b.buildOrdering(&b.root)
- b.t.root = b.root.handle
- for _, t := range b.locale {
- b.buildOrdering(t.index)
- if b.err != nil {
- break
- }
- }
- i, err := b.index.generate()
- b.t.trie = *i
- b.t.Index = colltab.Trie{
- Index: i.index,
- Values: i.values,
- Index0: i.index[blockSize*b.t.root.lookupStart:],
- Values0: i.values[blockSize*b.t.root.valueStart:],
- }
- b.error(err)
- return b.t, b.err
-}
-
-// Build builds the root Collator.
-func (b *Builder) Build() (colltab.Weighter, error) {
- table, err := b.build()
- if err != nil {
- return nil, err
- }
- return table, nil
-}
-
-// Build builds a Collator for Tailoring t.
-func (t *Tailoring) Build() (colltab.Weighter, error) {
- // TODO: implement.
- return nil, nil
-}
-
-// Print prints the tables for b and all its Tailorings as a Go file
-// that can be included in the Collate package.
-func (b *Builder) Print(w io.Writer) (n int, err error) {
- p := func(nn int, e error) {
- n += nn
- if err == nil {
- err = e
- }
- }
- t, err := b.build()
- if err != nil {
- return 0, err
- }
- p(fmt.Fprintf(w, `var availableLocales = "und`))
- for _, loc := range b.locale {
- if loc.id != "und" {
- p(fmt.Fprintf(w, ",%s", loc.id))
- }
- }
- p(fmt.Fprint(w, "\"\n\n"))
- p(fmt.Fprintf(w, "const varTop = 0x%x\n\n", b.varTop))
- p(fmt.Fprintln(w, "var locales = [...]tableIndex{"))
- for _, loc := range b.locale {
- if loc.id == "und" {
- p(t.fprintIndex(w, loc.index.handle, loc.id))
- }
- }
- for _, loc := range b.locale {
- if loc.id != "und" {
- p(t.fprintIndex(w, loc.index.handle, loc.id))
- }
- }
- p(fmt.Fprint(w, "}\n\n"))
- n, _, err = t.fprint(w, "main")
- return
-}
-
-// reproducibleFromNFKD checks whether the given expansion could be generated
-// from an NFKD expansion.
-func reproducibleFromNFKD(e *entry, exp, nfkd []rawCE) bool {
- // Length must be equal.
- if len(exp) != len(nfkd) {
- return false
- }
- for i, ce := range exp {
- // Primary and secondary values should be equal.
- if ce.w[0] != nfkd[i].w[0] || ce.w[1] != nfkd[i].w[1] {
- return false
- }
- // Tertiary values should be equal to maxTertiary for third element onwards.
- // TODO: there seem to be a lot of cases in CLDR (e.g. ㏭ in zh.xml) that can
- // simply be dropped. Try this out by dropping the following code.
- if i >= 2 && ce.w[2] != maxTertiary {
- return false
- }
- if _, err := makeCE(ce); err != nil {
- // Simply return false. The error will be caught elsewhere.
- return false
- }
- }
- return true
-}
-
-func simplify(o *ordering) {
- // Runes that are a starter of a contraction should not be removed.
- // (To date, there is only Kannada character 0CCA.)
- keep := make(map[rune]bool)
- for e := o.front(); e != nil; e, _ = e.nextIndexed() {
- if len(e.runes) > 1 {
- keep[e.runes[0]] = true
- }
- }
- // Tag entries for which the runes NFKD decompose to identical values.
- for e := o.front(); e != nil; e, _ = e.nextIndexed() {
- s := e.str
- nfkd := norm.NFKD.String(s)
- nfd := norm.NFD.String(s)
- if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == nfd {
- continue
- }
- if reproducibleFromNFKD(e, e.elems, o.genColElems(nfkd)) {
- e.decompose = true
- }
- }
-}
-
-// appendExpansion converts the given collation sequence to
-// collation elements and adds them to the expansion table.
-// It returns an index to the expansion table.
-func (b *Builder) appendExpansion(e *entry) int {
- t := b.t
- i := len(t.ExpandElem)
- ce := uint32(len(e.elems))
- t.ExpandElem = append(t.ExpandElem, ce)
- for _, w := range e.elems {
- ce, err := makeCE(w)
- if err != nil {
- b.error(err)
- return -1
- }
- t.ExpandElem = append(t.ExpandElem, ce)
- }
- return i
-}
-
-// processExpansions extracts data necessary to generate
-// the extraction tables.
-func (b *Builder) processExpansions(o *ordering) {
- for e := o.front(); e != nil; e, _ = e.nextIndexed() {
- if !e.expansion() {
- continue
- }
- key := fmt.Sprintf("%v", e.elems)
- i, ok := b.expIndex[key]
- if !ok {
- i = b.appendExpansion(e)
- b.expIndex[key] = i
- }
- e.expansionIndex = i
- }
-}
-
-func (b *Builder) processContractions(o *ordering) {
- // Collate contractions per starter rune.
- starters := []rune{}
- cm := make(map[rune][]*entry)
- for e := o.front(); e != nil; e, _ = e.nextIndexed() {
- if e.contraction() {
- if len(e.str) > b.t.MaxContractLen {
- b.t.MaxContractLen = len(e.str)
- }
- r := e.runes[0]
- if _, ok := cm[r]; !ok {
- starters = append(starters, r)
- }
- cm[r] = append(cm[r], e)
- }
- }
- // Add entries of single runes that are at a start of a contraction.
- for e := o.front(); e != nil; e, _ = e.nextIndexed() {
- if !e.contraction() {
- r := e.runes[0]
- if _, ok := cm[r]; ok {
- cm[r] = append(cm[r], e)
- }
- }
- }
- // Build the tries for the contractions.
- t := b.t
- for _, r := range starters {
- l := cm[r]
- // Compute suffix strings. There are 31 different contraction suffix
- // sets for 715 contractions and 82 contraction starter runes as of
- // version 6.0.0.
- sufx := []string{}
- hasSingle := false
- for _, e := range l {
- if len(e.runes) > 1 {
- sufx = append(sufx, string(e.runes[1:]))
- } else {
- hasSingle = true
- }
- }
- if !hasSingle {
- b.error(fmt.Errorf("no single entry for starter rune %U found", r))
- continue
- }
- // Unique the suffix set.
- sort.Strings(sufx)
- key := strings.Join(sufx, "\n")
- handle, ok := b.ctHandle[key]
- if !ok {
- var err error
- handle, err = appendTrie(&t.ContractTries, sufx)
- if err != nil {
- b.error(err)
- }
- b.ctHandle[key] = handle
- }
- // Bucket sort entries in index order.
- es := make([]*entry, len(l))
- for _, e := range l {
- var p, sn int
- if len(e.runes) > 1 {
- str := []byte(string(e.runes[1:]))
- p, sn = lookup(&t.ContractTries, handle, str)
- if sn != len(str) {
- log.Fatalf("%s: processContractions: unexpected length for '%X'; len=%d; want %d", o.id, e.runes, sn, len(str))
- }
- }
- if es[p] != nil {
- log.Fatalf("%s: multiple contractions for position %d for rune %U", o.id, p, e.runes[0])
- }
- es[p] = e
- }
- // Create collation elements for contractions.
- elems := []uint32{}
- for _, e := range es {
- ce, err := e.encodeBase()
- b.errorID(o.id, err)
- elems = append(elems, ce)
- }
- key = fmt.Sprintf("%v", elems)
- i, ok := b.ctElem[key]
- if !ok {
- i = len(t.ContractElem)
- b.ctElem[key] = i
- t.ContractElem = append(t.ContractElem, elems...)
- }
- // Store info in entry for starter rune.
- es[0].contractionIndex = i
- es[0].contractionHandle = handle
- }
-}
diff --git a/vendor/golang.org/x/text/collate/build/colelem.go b/vendor/golang.org/x/text/collate/build/colelem.go
deleted file mode 100644
index 726fe54..0000000
--- a/vendor/golang.org/x/text/collate/build/colelem.go
+++ /dev/null
@@ -1,294 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package build
-
-import (
- "fmt"
- "unicode"
-
- "golang.org/x/text/internal/colltab"
-)
-
-const (
- defaultSecondary = 0x20
- defaultTertiary = 0x2
- maxTertiary = 0x1F
-)
-
-type rawCE struct {
- w []int
- ccc uint8
-}
-
-func makeRawCE(w []int, ccc uint8) rawCE {
- ce := rawCE{w: make([]int, 4), ccc: ccc}
- copy(ce.w, w)
- return ce
-}
-
-// A collation element is represented as an uint32.
-// In the typical case, a rune maps to a single collation element. If a rune
-// can be the start of a contraction or expands into multiple collation elements,
-// then the collation element that is associated with a rune will have a special
-// form to represent such m to n mappings. Such special collation elements
-// have a value >= 0x80000000.
-
-const (
- maxPrimaryBits = 21
- maxSecondaryBits = 12
- maxTertiaryBits = 8
-)
-
-func makeCE(ce rawCE) (uint32, error) {
- v, e := colltab.MakeElem(ce.w[0], ce.w[1], ce.w[2], ce.ccc)
- return uint32(v), e
-}
-
-// For contractions, collation elements are of the form
-// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
-// - n* is the size of the first node in the contraction trie.
-// - i* is the index of the first node in the contraction trie.
-// - b* is the offset into the contraction collation element table.
-// See contract.go for details on the contraction trie.
-const (
- contractID = 0xC0000000
- maxNBits = 4
- maxTrieIndexBits = 12
- maxContractOffsetBits = 13
-)
-
-func makeContractIndex(h ctHandle, offset int) (uint32, error) {
- if h.n >= 1<<maxNBits {
- return 0, fmt.Errorf("size of contraction trie node too large: %d >= %d", h.n, 1<<maxNBits)
- }
- if h.index >= 1<<maxTrieIndexBits {
- return 0, fmt.Errorf("size of contraction trie offset too large: %d >= %d", h.index, 1<<maxTrieIndexBits)
- }
- if offset >= 1<<maxContractOffsetBits {
- return 0, fmt.Errorf("contraction offset out of bounds: %x >= %x", offset, 1<<maxContractOffsetBits)
- }
- ce := uint32(contractID)
- ce += uint32(offset << (maxNBits + maxTrieIndexBits))
- ce += uint32(h.index << maxNBits)
- ce += uint32(h.n)
- return ce, nil
-}
-
-// For expansions, collation elements are of the form
-// 11100000 00000000 bbbbbbbb bbbbbbbb,
-// where b* is the index into the expansion sequence table.
-const (
- expandID = 0xE0000000
- maxExpandIndexBits = 16
-)
-
-func makeExpandIndex(index int) (uint32, error) {
- if index >= 1<<maxExpandIndexBits {
- return 0, fmt.Errorf("expansion index out of bounds: %x >= %x", index, 1<<maxExpandIndexBits)
- }
- return expandID + uint32(index), nil
-}
-
-// Each list of collation elements corresponding to an expansion starts with
-// a header indicating the length of the sequence.
-func makeExpansionHeader(n int) (uint32, error) {
- return uint32(n), nil
-}
-
-// Some runes can be expanded using NFKD decomposition. Instead of storing the full
-// sequence of collation elements, we decompose the rune and lookup the collation
-// elements for each rune in the decomposition and modify the tertiary weights.
-// The collation element, in this case, is of the form
-// 11110000 00000000 wwwwwwww vvvvvvvv, where
-// - v* is the replacement tertiary weight for the first rune,
-// - w* is the replacement tertiary weight for the second rune,
-// Tertiary weights of subsequent runes should be replaced with maxTertiary.
-// See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
-const (
- decompID = 0xF0000000
-)
-
-func makeDecompose(t1, t2 int) (uint32, error) {
- if t1 >= 256 || t1 < 0 {
- return 0, fmt.Errorf("first tertiary weight out of bounds: %d >= 256", t1)
- }
- if t2 >= 256 || t2 < 0 {
- return 0, fmt.Errorf("second tertiary weight out of bounds: %d >= 256", t2)
- }
- return uint32(t2<<8+t1) + decompID, nil
-}
-
-const (
- // These constants were taken from http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
- minUnified rune = 0x4E00
- maxUnified = 0x9FFF
- minCompatibility = 0xF900
- maxCompatibility = 0xFAFF
- minRare = 0x3400
- maxRare = 0x4DBF
-)
-const (
- commonUnifiedOffset = 0x10000
- rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF
- otherOffset = 0x50000 // largest rune in rare is U+2FA1D
- illegalOffset = otherOffset + int(unicode.MaxRune)
- maxPrimary = illegalOffset + 1
-)
-
-// implicitPrimary returns the primary weight for the a rune
-// for which there is no entry for the rune in the collation table.
-// We take a different approach from the one specified in
-// http://unicode.org/reports/tr10/#Implicit_Weights,
-// but preserve the resulting relative ordering of the runes.
-func implicitPrimary(r rune) int {
- if unicode.Is(unicode.Ideographic, r) {
- if r >= minUnified && r <= maxUnified {
- // The most common case for CJK.
- return int(r) + commonUnifiedOffset
- }
- if r >= minCompatibility && r <= maxCompatibility {
- // This will typically not hit. The DUCET explicitly specifies mappings
- // for all characters that do not decompose.
- return int(r) + commonUnifiedOffset
- }
- return int(r) + rareUnifiedOffset
- }
- return int(r) + otherOffset
-}
-
-// convertLargeWeights converts collation elements with large
-// primaries (either double primaries or for illegal runes)
-// to our own representation.
-// A CJK character C is represented in the DUCET as
-// [.FBxx.0020.0002.C][.BBBB.0000.0000.C]
-// We will rewrite these characters to a single CE.
-// We assume the CJK values start at 0x8000.
-// See http://unicode.org/reports/tr10/#Implicit_Weights
-func convertLargeWeights(elems []rawCE) (res []rawCE, err error) {
- const (
- cjkPrimaryStart = 0xFB40
- rarePrimaryStart = 0xFB80
- otherPrimaryStart = 0xFBC0
- illegalPrimary = 0xFFFE
- highBitsMask = 0x3F
- lowBitsMask = 0x7FFF
- lowBitsFlag = 0x8000
- shiftBits = 15
- )
- for i := 0; i < len(elems); i++ {
- ce := elems[i].w
- p := ce[0]
- if p < cjkPrimaryStart {
- continue
- }
- if p > 0xFFFF {
- return elems, fmt.Errorf("found primary weight %X; should be <= 0xFFFF", p)
- }
- if p >= illegalPrimary {
- ce[0] = illegalOffset + p - illegalPrimary
- } else {
- if i+1 >= len(elems) {
- return elems, fmt.Errorf("second part of double primary weight missing: %v", elems)
- }
- if elems[i+1].w[0]&lowBitsFlag == 0 {
- return elems, fmt.Errorf("malformed second part of double primary weight: %v", elems)
- }
- np := ((p & highBitsMask) << shiftBits) + elems[i+1].w[0]&lowBitsMask
- switch {
- case p < rarePrimaryStart:
- np += commonUnifiedOffset
- case p < otherPrimaryStart:
- np += rareUnifiedOffset
- default:
- p += otherOffset
- }
- ce[0] = np
- for j := i + 1; j+1 < len(elems); j++ {
- elems[j] = elems[j+1]
- }
- elems = elems[:len(elems)-1]
- }
- }
- return elems, nil
-}
-
-// nextWeight computes the first possible collation weights following elems
-// for the given level.
-func nextWeight(level colltab.Level, elems []rawCE) []rawCE {
- if level == colltab.Identity {
- next := make([]rawCE, len(elems))
- copy(next, elems)
- return next
- }
- next := []rawCE{makeRawCE(elems[0].w, elems[0].ccc)}
- next[0].w[level]++
- if level < colltab.Secondary {
- next[0].w[colltab.Secondary] = defaultSecondary
- }
- if level < colltab.Tertiary {
- next[0].w[colltab.Tertiary] = defaultTertiary
- }
- // Filter entries that cannot influence ordering.
- for _, ce := range elems[1:] {
- skip := true
- for i := colltab.Primary; i < level; i++ {
- skip = skip && ce.w[i] == 0
- }
- if !skip {
- next = append(next, ce)
- }
- }
- return next
-}
-
-func nextVal(elems []rawCE, i int, level colltab.Level) (index, value int) {
- for ; i < len(elems) && elems[i].w[level] == 0; i++ {
- }
- if i < len(elems) {
- return i, elems[i].w[level]
- }
- return i, 0
-}
-
-// compareWeights returns -1 if a < b, 1 if a > b, or 0 otherwise.
-// It also returns the collation level at which the difference is found.
-func compareWeights(a, b []rawCE) (result int, level colltab.Level) {
- for level := colltab.Primary; level < colltab.Identity; level++ {
- var va, vb int
- for ia, ib := 0, 0; ia < len(a) || ib < len(b); ia, ib = ia+1, ib+1 {
- ia, va = nextVal(a, ia, level)
- ib, vb = nextVal(b, ib, level)
- if va != vb {
- if va < vb {
- return -1, level
- } else {
- return 1, level
- }
- }
- }
- }
- return 0, colltab.Identity
-}
-
-func equalCE(a, b rawCE) bool {
- for i := 0; i < 3; i++ {
- if b.w[i] != a.w[i] {
- return false
- }
- }
- return true
-}
-
-func equalCEArrays(a, b []rawCE) bool {
- if len(a) != len(b) {
- return false
- }
- for i := range a {
- if !equalCE(a[i], b[i]) {
- return false
- }
- }
- return true
-}
diff --git a/vendor/golang.org/x/text/collate/build/contract.go b/vendor/golang.org/x/text/collate/build/contract.go
deleted file mode 100644
index a6a7e01..0000000
--- a/vendor/golang.org/x/text/collate/build/contract.go
+++ /dev/null
@@ -1,309 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package build
-
-import (
- "fmt"
- "io"
- "reflect"
- "sort"
- "strings"
-
- "golang.org/x/text/internal/colltab"
-)
-
-// This file contains code for detecting contractions and generating
-// the necessary tables.
-// Any Unicode Collation Algorithm (UCA) table entry that has more than
-// one rune one the left-hand side is called a contraction.
-// See http://www.unicode.org/reports/tr10/#Contractions for more details.
-//
-// We define the following terms:
-// initial: a rune that appears as the first rune in a contraction.
-// suffix: a sequence of runes succeeding the initial rune
-// in a given contraction.
-// non-initial: a rune that appears in a suffix.
-//
-// A rune may be both an initial and a non-initial and may be so in
-// many contractions. An initial may typically also appear by itself.
-// In case of ambiguities, the UCA requires we match the longest
-// contraction.
-//
-// Many contraction rules share the same set of possible suffixes.
-// We store sets of suffixes in a trie that associates an index with
-// each suffix in the set. This index can be used to look up a
-// collation element associated with the (starter rune, suffix) pair.
-//
-// The trie is defined on a UTF-8 byte sequence.
-// The overall trie is represented as an array of ctEntries. Each node of the trie
-// is represented as a subsequence of ctEntries, where each entry corresponds to
-// a possible match of a next character in the search string. An entry
-// also includes the length and offset to the next sequence of entries
-// to check in case of a match.
-
-const (
- final = 0
- noIndex = 0xFF
-)
-
-// ctEntry associates to a matching byte an offset and/or next sequence of
-// bytes to check. A ctEntry c is called final if a match means that the
-// longest suffix has been found. An entry c is final if c.N == 0.
-// A single final entry can match a range of characters to an offset.
-// A non-final entry always matches a single byte. Note that a non-final
-// entry might still resemble a completed suffix.
-// Examples:
-// The suffix strings "ab" and "ac" can be represented as:
-// []ctEntry{
-// {'a', 1, 1, noIndex}, // 'a' by itself does not match, so i is 0xFF.
-// {'b', 'c', 0, 1}, // "ab" -> 1, "ac" -> 2
-// }
-//
-// The suffix strings "ab", "abc", "abd", and "abcd" can be represented as:
-// []ctEntry{
-// {'a', 1, 1, noIndex}, // 'a' must be followed by 'b'.
-// {'b', 1, 2, 1}, // "ab" -> 1, may be followed by 'c' or 'd'.
-// {'d', 'd', final, 3}, // "abd" -> 3
-// {'c', 4, 1, 2}, // "abc" -> 2, may be followed by 'd'.
-// {'d', 'd', final, 4}, // "abcd" -> 4
-// }
-// See genStateTests in contract_test.go for more examples.
-type ctEntry struct {
- L uint8 // non-final: byte value to match; final: lowest match in range.
- H uint8 // non-final: relative index to next block; final: highest match in range.
- N uint8 // non-final: length of next block; final: final
- I uint8 // result offset. Will be noIndex if more bytes are needed to complete.
-}
-
-// contractTrieSet holds a set of contraction tries. The tries are stored
-// consecutively in the entry field.
-type contractTrieSet []struct{ l, h, n, i uint8 }
-
-// ctHandle is used to identify a trie in the trie set, consisting in an offset
-// in the array and the size of the first node.
-type ctHandle struct {
- index, n int
-}
-
-// appendTrie adds a new trie for the given suffixes to the trie set and returns
-// a handle to it. The handle will be invalid on error.
-func appendTrie(ct *colltab.ContractTrieSet, suffixes []string) (ctHandle, error) {
- es := make([]stridx, len(suffixes))
- for i, s := range suffixes {
- es[i].str = s
- }
- sort.Sort(offsetSort(es))
- for i := range es {
- es[i].index = i + 1
- }
- sort.Sort(genidxSort(es))
- i := len(*ct)
- n, err := genStates(ct, es)
- if err != nil {
- *ct = (*ct)[:i]
- return ctHandle{}, err
- }
- return ctHandle{i, n}, nil
-}
-
-// genStates generates ctEntries for a given suffix set and returns
-// the number of entries for the first node.
-func genStates(ct *colltab.ContractTrieSet, sis []stridx) (int, error) {
- if len(sis) == 0 {
- return 0, fmt.Errorf("genStates: list of suffices must be non-empty")
- }
- start := len(*ct)
- // create entries for differing first bytes.
- for _, si := range sis {
- s := si.str
- if len(s) == 0 {
- continue
- }
- added := false
- c := s[0]
- if len(s) > 1 {
- for j := len(*ct) - 1; j >= start; j-- {
- if (*ct)[j].L == c {
- added = true
- break
- }
- }
- if !added {
- *ct = append(*ct, ctEntry{L: c, I: noIndex})
- }
- } else {
- for j := len(*ct) - 1; j >= start; j-- {
- // Update the offset for longer suffixes with the same byte.
- if (*ct)[j].L == c {
- (*ct)[j].I = uint8(si.index)
- added = true
- }
- // Extend range of final ctEntry, if possible.
- if (*ct)[j].H+1 == c {
- (*ct)[j].H = c
- added = true
- }
- }
- if !added {
- *ct = append(*ct, ctEntry{L: c, H: c, N: final, I: uint8(si.index)})
- }
- }
- }
- n := len(*ct) - start
- // Append nodes for the remainder of the suffixes for each ctEntry.
- sp := 0
- for i, end := start, len(*ct); i < end; i++ {
- fe := (*ct)[i]
- if fe.H == 0 { // uninitialized non-final
- ln := len(*ct) - start - n
- if ln > 0xFF {
- return 0, fmt.Errorf("genStates: relative block offset too large: %d > 255", ln)
- }
- fe.H = uint8(ln)
- // Find first non-final strings with same byte as current entry.
- for ; sis[sp].str[0] != fe.L; sp++ {
- }
- se := sp + 1
- for ; se < len(sis) && len(sis[se].str) > 1 && sis[se].str[0] == fe.L; se++ {
- }
- sl := sis[sp:se]
- sp = se
- for i, si := range sl {
- sl[i].str = si.str[1:]
- }
- nn, err := genStates(ct, sl)
- if err != nil {
- return 0, err
- }
- fe.N = uint8(nn)
- (*ct)[i] = fe
- }
- }
- sort.Sort(entrySort((*ct)[start : start+n]))
- return n, nil
-}
-
-// There may be both a final and non-final entry for a byte if the byte
-// is implied in a range of matches in the final entry.
-// We need to ensure that the non-final entry comes first in that case.
-type entrySort colltab.ContractTrieSet
-
-func (fe entrySort) Len() int { return len(fe) }
-func (fe entrySort) Swap(i, j int) { fe[i], fe[j] = fe[j], fe[i] }
-func (fe entrySort) Less(i, j int) bool {
- return fe[i].L > fe[j].L
-}
-
-// stridx is used for sorting suffixes and their associated offsets.
-type stridx struct {
- str string
- index int
-}
-
-// For computing the offsets, we first sort by size, and then by string.
-// This ensures that strings that only differ in the last byte by 1
-// are sorted consecutively in increasing order such that they can
-// be packed as a range in a final ctEntry.
-type offsetSort []stridx
-
-func (si offsetSort) Len() int { return len(si) }
-func (si offsetSort) Swap(i, j int) { si[i], si[j] = si[j], si[i] }
-func (si offsetSort) Less(i, j int) bool {
- if len(si[i].str) != len(si[j].str) {
- return len(si[i].str) > len(si[j].str)
- }
- return si[i].str < si[j].str
-}
-
-// For indexing, we want to ensure that strings are sorted in string order, where
-// for strings with the same prefix, we put longer strings before shorter ones.
-type genidxSort []stridx
-
-func (si genidxSort) Len() int { return len(si) }
-func (si genidxSort) Swap(i, j int) { si[i], si[j] = si[j], si[i] }
-func (si genidxSort) Less(i, j int) bool {
- if strings.HasPrefix(si[j].str, si[i].str) {
- return false
- }
- if strings.HasPrefix(si[i].str, si[j].str) {
- return true
- }
- return si[i].str < si[j].str
-}
-
-// lookup matches the longest suffix in str and returns the associated offset
-// and the number of bytes consumed.
-func lookup(ct *colltab.ContractTrieSet, h ctHandle, str []byte) (index, ns int) {
- states := (*ct)[h.index:]
- p := 0
- n := h.n
- for i := 0; i < n && p < len(str); {
- e := states[i]
- c := str[p]
- if c >= e.L {
- if e.L == c {
- p++
- if e.I != noIndex {
- index, ns = int(e.I), p
- }
- if e.N != final {
- // set to new state
- i, states, n = 0, states[int(e.H)+n:], int(e.N)
- } else {
- return
- }
- continue
- } else if e.N == final && c <= e.H {
- p++
- return int(c-e.L) + int(e.I), p
- }
- }
- i++
- }
- return
-}
-
-// print writes the contractTrieSet t as compilable Go code to w. It returns
-// the total number of bytes written and the size of the resulting data structure in bytes.
-func print(t *colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
- update3 := func(nn, sz int, e error) {
- n += nn
- if err == nil {
- err = e
- }
- size += sz
- }
- update2 := func(nn int, e error) { update3(nn, 0, e) }
-
- update3(printArray(*t, w, name))
- update2(fmt.Fprintf(w, "var %sContractTrieSet = ", name))
- update3(printStruct(*t, w, name))
- update2(fmt.Fprintln(w))
- return
-}
-
-func printArray(ct colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
- p := func(f string, a ...interface{}) {
- nn, e := fmt.Fprintf(w, f, a...)
- n += nn
- if err == nil {
- err = e
- }
- }
- size = len(ct) * 4
- p("// %sCTEntries: %d entries, %d bytes\n", name, len(ct), size)
- p("var %sCTEntries = [%d]struct{L,H,N,I uint8}{\n", name, len(ct))
- for _, fe := range ct {
- p("\t{0x%X, 0x%X, %d, %d},\n", fe.L, fe.H, fe.N, fe.I)
- }
- p("}\n")
- return
-}
-
-func printStruct(ct colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
- n, err = fmt.Fprintf(w, "colltab.ContractTrieSet( %sCTEntries[:] )", name)
- size = int(reflect.TypeOf(ct).Size())
- return
-}
diff --git a/vendor/golang.org/x/text/collate/build/order.go b/vendor/golang.org/x/text/collate/build/order.go
deleted file mode 100644
index 2c568db..0000000
--- a/vendor/golang.org/x/text/collate/build/order.go
+++ /dev/null
@@ -1,393 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package build
-
-import (
- "fmt"
- "log"
- "sort"
- "strings"
- "unicode"
-
- "golang.org/x/text/internal/colltab"
- "golang.org/x/text/unicode/norm"
-)
-
-type logicalAnchor int
-
-const (
- firstAnchor logicalAnchor = -1
- noAnchor = 0
- lastAnchor = 1
-)
-
-// entry is used to keep track of a single entry in the collation element table
-// during building. Examples of entries can be found in the Default Unicode
-// Collation Element Table.
-// See http://www.unicode.org/Public/UCA/6.0.0/allkeys.txt.
-type entry struct {
- str string // same as string(runes)
- runes []rune
- elems []rawCE // the collation elements
- extend string // weights of extend to be appended to elems
- before bool // weights relative to next instead of previous.
- lock bool // entry is used in extension and can no longer be moved.
-
- // prev, next, and level are used to keep track of tailorings.
- prev, next *entry
- level colltab.Level // next differs at this level
- skipRemove bool // do not unlink when removed
-
- decompose bool // can use NFKD decomposition to generate elems
- exclude bool // do not include in table
- implicit bool // derived, is not included in the list
- modified bool // entry was modified in tailoring
- logical logicalAnchor
-
- expansionIndex int // used to store index into expansion table
- contractionHandle ctHandle
- contractionIndex int // index into contraction elements
-}
-
-func (e *entry) String() string {
- return fmt.Sprintf("%X (%q) -> %X (ch:%x; ci:%d, ei:%d)",
- e.runes, e.str, e.elems, e.contractionHandle, e.contractionIndex, e.expansionIndex)
-}
-
-func (e *entry) skip() bool {
- return e.contraction()
-}
-
-func (e *entry) expansion() bool {
- return !e.decompose && len(e.elems) > 1
-}
-
-func (e *entry) contraction() bool {
- return len(e.runes) > 1
-}
-
-func (e *entry) contractionStarter() bool {
- return e.contractionHandle.n != 0
-}
-
-// nextIndexed gets the next entry that needs to be stored in the table.
-// It returns the entry and the collation level at which the next entry differs
-// from the current entry.
-// Entries that can be explicitly derived and logical reset positions are
-// examples of entries that will not be indexed.
-func (e *entry) nextIndexed() (*entry, colltab.Level) {
- level := e.level
- for e = e.next; e != nil && (e.exclude || len(e.elems) == 0); e = e.next {
- if e.level < level {
- level = e.level
- }
- }
- return e, level
-}
-
-// remove unlinks entry e from the sorted chain and clears the collation
-// elements. e may not be at the front or end of the list. This should always
-// be the case, as the front and end of the list are always logical anchors,
-// which may not be removed.
-func (e *entry) remove() {
- if e.logical != noAnchor {
- log.Fatalf("may not remove anchor %q", e.str)
- }
- // TODO: need to set e.prev.level to e.level if e.level is smaller?
- e.elems = nil
- if !e.skipRemove {
- if e.prev != nil {
- e.prev.next = e.next
- }
- if e.next != nil {
- e.next.prev = e.prev
- }
- }
- e.skipRemove = false
-}
-
-// insertAfter inserts n after e.
-func (e *entry) insertAfter(n *entry) {
- if e == n {
- panic("e == anchor")
- }
- if e == nil {
- panic("unexpected nil anchor")
- }
- n.remove()
- n.decompose = false // redo decomposition test
-
- n.next = e.next
- n.prev = e
- if e.next != nil {
- e.next.prev = n
- }
- e.next = n
-}
-
-// insertBefore inserts n before e.
-func (e *entry) insertBefore(n *entry) {
- if e == n {
- panic("e == anchor")
- }
- if e == nil {
- panic("unexpected nil anchor")
- }
- n.remove()
- n.decompose = false // redo decomposition test
-
- n.prev = e.prev
- n.next = e
- if e.prev != nil {
- e.prev.next = n
- }
- e.prev = n
-}
-
-func (e *entry) encodeBase() (ce uint32, err error) {
- switch {
- case e.expansion():
- ce, err = makeExpandIndex(e.expansionIndex)
- default:
- if e.decompose {
- log.Fatal("decompose should be handled elsewhere")
- }
- ce, err = makeCE(e.elems[0])
- }
- return
-}
-
-func (e *entry) encode() (ce uint32, err error) {
- if e.skip() {
- log.Fatal("cannot build colElem for entry that should be skipped")
- }
- switch {
- case e.decompose:
- t1 := e.elems[0].w[2]
- t2 := 0
- if len(e.elems) > 1 {
- t2 = e.elems[1].w[2]
- }
- ce, err = makeDecompose(t1, t2)
- case e.contractionStarter():
- ce, err = makeContractIndex(e.contractionHandle, e.contractionIndex)
- default:
- if len(e.runes) > 1 {
- log.Fatal("colElem: contractions are handled in contraction trie")
- }
- ce, err = e.encodeBase()
- }
- return
-}
-
-// entryLess returns true if a sorts before b and false otherwise.
-func entryLess(a, b *entry) bool {
- if res, _ := compareWeights(a.elems, b.elems); res != 0 {
- return res == -1
- }
- if a.logical != noAnchor {
- return a.logical == firstAnchor
- }
- if b.logical != noAnchor {
- return b.logical == lastAnchor
- }
- return a.str < b.str
-}
-
-type sortedEntries []*entry
-
-func (s sortedEntries) Len() int {
- return len(s)
-}
-
-func (s sortedEntries) Swap(i, j int) {
- s[i], s[j] = s[j], s[i]
-}
-
-func (s sortedEntries) Less(i, j int) bool {
- return entryLess(s[i], s[j])
-}
-
-type ordering struct {
- id string
- entryMap map[string]*entry
- ordered []*entry
- handle *trieHandle
-}
-
-// insert inserts e into both entryMap and ordered.
-// Note that insert simply appends e to ordered. To reattain a sorted
-// order, o.sort() should be called.
-func (o *ordering) insert(e *entry) {
- if e.logical == noAnchor {
- o.entryMap[e.str] = e
- } else {
- // Use key format as used in UCA rules.
- o.entryMap[fmt.Sprintf("[%s]", e.str)] = e
- // Also add index entry for XML format.
- o.entryMap[fmt.Sprintf("<%s/>", strings.Replace(e.str, " ", "_", -1))] = e
- }
- o.ordered = append(o.ordered, e)
-}
-
-// newEntry creates a new entry for the given info and inserts it into
-// the index.
-func (o *ordering) newEntry(s string, ces []rawCE) *entry {
- e := &entry{
- runes: []rune(s),
- elems: ces,
- str: s,
- }
- o.insert(e)
- return e
-}
-
-// find looks up and returns the entry for the given string.
-// It returns nil if str is not in the index and if an implicit value
-// cannot be derived, that is, if str represents more than one rune.
-func (o *ordering) find(str string) *entry {
- e := o.entryMap[str]
- if e == nil {
- r := []rune(str)
- if len(r) == 1 {
- const (
- firstHangul = 0xAC00
- lastHangul = 0xD7A3
- )
- if r[0] >= firstHangul && r[0] <= lastHangul {
- ce := []rawCE{}
- nfd := norm.NFD.String(str)
- for _, r := range nfd {
- ce = append(ce, o.find(string(r)).elems...)
- }
- e = o.newEntry(nfd, ce)
- } else {
- e = o.newEntry(string(r[0]), []rawCE{
- {w: []int{
- implicitPrimary(r[0]),
- defaultSecondary,
- defaultTertiary,
- int(r[0]),
- },
- },
- })
- e.modified = true
- }
- e.exclude = true // do not index implicits
- }
- }
- return e
-}
-
-// makeRootOrdering returns a newly initialized ordering value and populates
-// it with a set of logical reset points that can be used as anchors.
-// The anchors first_tertiary_ignorable and __END__ will always sort at
-// the beginning and end, respectively. This means that prev and next are non-nil
-// for any indexed entry.
-func makeRootOrdering() ordering {
- const max = unicode.MaxRune
- o := ordering{
- entryMap: make(map[string]*entry),
- }
- insert := func(typ logicalAnchor, s string, ce []int) {
- e := &entry{
- elems: []rawCE{{w: ce}},
- str: s,
- exclude: true,
- logical: typ,
- }
- o.insert(e)
- }
- insert(firstAnchor, "first tertiary ignorable", []int{0, 0, 0, 0})
- insert(lastAnchor, "last tertiary ignorable", []int{0, 0, 0, max})
- insert(lastAnchor, "last primary ignorable", []int{0, defaultSecondary, defaultTertiary, max})
- insert(lastAnchor, "last non ignorable", []int{maxPrimary, defaultSecondary, defaultTertiary, max})
- insert(lastAnchor, "__END__", []int{1 << maxPrimaryBits, defaultSecondary, defaultTertiary, max})
- return o
-}
-
-// patchForInsert eleminates entries from the list with more than one collation element.
-// The next and prev fields of the eliminated entries still point to appropriate
-// values in the newly created list.
-// It requires that sort has been called.
-func (o *ordering) patchForInsert() {
- for i := 0; i < len(o.ordered)-1; {
- e := o.ordered[i]
- lev := e.level
- n := e.next
- for ; n != nil && len(n.elems) > 1; n = n.next {
- if n.level < lev {
- lev = n.level
- }
- n.skipRemove = true
- }
- for ; o.ordered[i] != n; i++ {
- o.ordered[i].level = lev
- o.ordered[i].next = n
- o.ordered[i+1].prev = e
- }
- }
-}
-
-// clone copies all ordering of es into a new ordering value.
-func (o *ordering) clone() *ordering {
- o.sort()
- oo := ordering{
- entryMap: make(map[string]*entry),
- }
- for _, e := range o.ordered {
- ne := &entry{
- runes: e.runes,
- elems: e.elems,
- str: e.str,
- decompose: e.decompose,
- exclude: e.exclude,
- logical: e.logical,
- }
- oo.insert(ne)
- }
- oo.sort() // link all ordering.
- oo.patchForInsert()
- return &oo
-}
-
-// front returns the first entry to be indexed.
-// It assumes that sort() has been called.
-func (o *ordering) front() *entry {
- e := o.ordered[0]
- if e.prev != nil {
- log.Panicf("unexpected first entry: %v", e)
- }
- // The first entry is always a logical position, which should not be indexed.
- e, _ = e.nextIndexed()
- return e
-}
-
-// sort sorts all ordering based on their collation elements and initializes
-// the prev, next, and level fields accordingly.
-func (o *ordering) sort() {
- sort.Sort(sortedEntries(o.ordered))
- l := o.ordered
- for i := 1; i < len(l); i++ {
- k := i - 1
- l[k].next = l[i]
- _, l[k].level = compareWeights(l[k].elems, l[i].elems)
- l[i].prev = l[k]
- }
-}
-
-// genColElems generates a collation element array from the runes in str. This
-// assumes that all collation elements have already been added to the Builder.
-func (o *ordering) genColElems(str string) []rawCE {
- elems := []rawCE{}
- for _, r := range []rune(str) {
- for _, ce := range o.find(string(r)).elems {
- if ce.w[0] != 0 || ce.w[1] != 0 || ce.w[2] != 0 {
- elems = append(elems, ce)
- }
- }
- }
- return elems
-}
diff --git a/vendor/golang.org/x/text/collate/build/table.go b/vendor/golang.org/x/text/collate/build/table.go
deleted file mode 100644
index 7eea7a6..0000000
--- a/vendor/golang.org/x/text/collate/build/table.go
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package build
-
-import (
- "fmt"
- "io"
- "reflect"
-
- "golang.org/x/text/internal/colltab"
-)
-
-// table is an intermediate structure that roughly resembles the table in collate.
-type table struct {
- colltab.Table
- trie trie
- root *trieHandle
-}
-
-// print writes the table as Go compilable code to w. It prefixes the
-// variable names with name. It returns the number of bytes written
-// and the size of the resulting table.
-func (t *table) fprint(w io.Writer, name string) (n, size int, err error) {
- update := func(nn, sz int, e error) {
- n += nn
- if err == nil {
- err = e
- }
- size += sz
- }
- // Write arrays needed for the structure.
- update(printColElems(w, t.ExpandElem, name+"ExpandElem"))
- update(printColElems(w, t.ContractElem, name+"ContractElem"))
- update(t.trie.printArrays(w, name))
- update(printArray(t.ContractTries, w, name))
-
- nn, e := fmt.Fprintf(w, "// Total size of %sTable is %d bytes\n", name, size)
- update(nn, 0, e)
- return
-}
-
-func (t *table) fprintIndex(w io.Writer, h *trieHandle, id string) (n int, err error) {
- p := func(f string, a ...interface{}) {
- nn, e := fmt.Fprintf(w, f, a...)
- n += nn
- if err == nil {
- err = e
- }
- }
- p("\t{ // %s\n", id)
- p("\t\tlookupOffset: 0x%x,\n", h.lookupStart)
- p("\t\tvaluesOffset: 0x%x,\n", h.valueStart)
- p("\t},\n")
- return
-}
-
-func printColElems(w io.Writer, a []uint32, name string) (n, sz int, err error) {
- p := func(f string, a ...interface{}) {
- nn, e := fmt.Fprintf(w, f, a...)
- n += nn
- if err == nil {
- err = e
- }
- }
- sz = len(a) * int(reflect.TypeOf(uint32(0)).Size())
- p("// %s: %d entries, %d bytes\n", name, len(a), sz)
- p("var %s = [%d]uint32 {", name, len(a))
- for i, c := range a {
- switch {
- case i%64 == 0:
- p("\n\t// Block %d, offset 0x%x\n", i/64, i)
- case (i%64)%6 == 0:
- p("\n\t")
- }
- p("0x%.8X, ", c)
- }
- p("\n}\n\n")
- return
-}
diff --git a/vendor/golang.org/x/text/collate/build/trie.go b/vendor/golang.org/x/text/collate/build/trie.go
deleted file mode 100644
index 9404a34..0000000
--- a/vendor/golang.org/x/text/collate/build/trie.go
+++ /dev/null
@@ -1,290 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// The trie in this file is used to associate the first full character
-// in a UTF-8 string to a collation element.
-// All but the last byte in a UTF-8 byte sequence are
-// used to look up offsets in the index table to be used for the next byte.
-// The last byte is used to index into a table of collation elements.
-// This file contains the code for the generation of the trie.
-
-package build
-
-import (
- "fmt"
- "hash/fnv"
- "io"
- "reflect"
-)
-
-const (
- blockSize = 64
- blockOffset = 2 // Subtract 2 blocks to compensate for the 0x80 added to continuation bytes.
-)
-
-type trieHandle struct {
- lookupStart uint16 // offset in table for first byte
- valueStart uint16 // offset in table for first byte
-}
-
-type trie struct {
- index []uint16
- values []uint32
-}
-
-// trieNode is the intermediate trie structure used for generating a trie.
-type trieNode struct {
- index []*trieNode
- value []uint32
- b byte
- refValue uint16
- refIndex uint16
-}
-
-func newNode() *trieNode {
- return &trieNode{
- index: make([]*trieNode, 64),
- value: make([]uint32, 128), // root node size is 128 instead of 64
- }
-}
-
-func (n *trieNode) isInternal() bool {
- return n.value != nil
-}
-
-func (n *trieNode) insert(r rune, value uint32) {
- const maskx = 0x3F // mask out two most-significant bits
- str := string(r)
- if len(str) == 1 {
- n.value[str[0]] = value
- return
- }
- for i := 0; i < len(str)-1; i++ {
- b := str[i] & maskx
- if n.index == nil {
- n.index = make([]*trieNode, blockSize)
- }
- nn := n.index[b]
- if nn == nil {
- nn = &trieNode{}
- nn.b = b
- n.index[b] = nn
- }
- n = nn
- }
- if n.value == nil {
- n.value = make([]uint32, blockSize)
- }
- b := str[len(str)-1] & maskx
- n.value[b] = value
-}
-
-type trieBuilder struct {
- t *trie
-
- roots []*trieHandle
-
- lookupBlocks []*trieNode
- valueBlocks []*trieNode
-
- lookupBlockIdx map[uint32]*trieNode
- valueBlockIdx map[uint32]*trieNode
-}
-
-func newTrieBuilder() *trieBuilder {
- index := &trieBuilder{}
- index.lookupBlocks = make([]*trieNode, 0)
- index.valueBlocks = make([]*trieNode, 0)
- index.lookupBlockIdx = make(map[uint32]*trieNode)
- index.valueBlockIdx = make(map[uint32]*trieNode)
- // The third nil is the default null block. The other two blocks
- // are used to guarantee an offset of at least 3 for each block.
- index.lookupBlocks = append(index.lookupBlocks, nil, nil, nil)
- index.t = &trie{}
- return index
-}
-
-func (b *trieBuilder) computeOffsets(n *trieNode) *trieNode {
- hasher := fnv.New32()
- if n.index != nil {
- for i, nn := range n.index {
- var vi, vv uint16
- if nn != nil {
- nn = b.computeOffsets(nn)
- n.index[i] = nn
- vi = nn.refIndex
- vv = nn.refValue
- }
- hasher.Write([]byte{byte(vi >> 8), byte(vi)})
- hasher.Write([]byte{byte(vv >> 8), byte(vv)})
- }
- h := hasher.Sum32()
- nn, ok := b.lookupBlockIdx[h]
- if !ok {
- n.refIndex = uint16(len(b.lookupBlocks)) - blockOffset
- b.lookupBlocks = append(b.lookupBlocks, n)
- b.lookupBlockIdx[h] = n
- } else {
- n = nn
- }
- } else {
- for _, v := range n.value {
- hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
- }
- h := hasher.Sum32()
- nn, ok := b.valueBlockIdx[h]
- if !ok {
- n.refValue = uint16(len(b.valueBlocks)) - blockOffset
- n.refIndex = n.refValue
- b.valueBlocks = append(b.valueBlocks, n)
- b.valueBlockIdx[h] = n
- } else {
- n = nn
- }
- }
- return n
-}
-
-func (b *trieBuilder) addStartValueBlock(n *trieNode) uint16 {
- hasher := fnv.New32()
- for _, v := range n.value[:2*blockSize] {
- hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
- }
- h := hasher.Sum32()
- nn, ok := b.valueBlockIdx[h]
- if !ok {
- n.refValue = uint16(len(b.valueBlocks))
- n.refIndex = n.refValue
- b.valueBlocks = append(b.valueBlocks, n)
- // Add a dummy block to accommodate the double block size.
- b.valueBlocks = append(b.valueBlocks, nil)
- b.valueBlockIdx[h] = n
- } else {
- n = nn
- }
- return n.refValue
-}
-
-func genValueBlock(t *trie, n *trieNode) {
- if n != nil {
- for _, v := range n.value {
- t.values = append(t.values, v)
- }
- }
-}
-
-func genLookupBlock(t *trie, n *trieNode) {
- for _, nn := range n.index {
- v := uint16(0)
- if nn != nil {
- if n.index != nil {
- v = nn.refIndex
- } else {
- v = nn.refValue
- }
- }
- t.index = append(t.index, v)
- }
-}
-
-func (b *trieBuilder) addTrie(n *trieNode) *trieHandle {
- h := &trieHandle{}
- b.roots = append(b.roots, h)
- h.valueStart = b.addStartValueBlock(n)
- if len(b.roots) == 1 {
- // We insert a null block after the first start value block.
- // This ensures that continuation bytes UTF-8 sequences of length
- // greater than 2 will automatically hit a null block if there
- // was an undefined entry.
- b.valueBlocks = append(b.valueBlocks, nil)
- }
- n = b.computeOffsets(n)
- // Offset by one extra block as the first byte starts at 0xC0 instead of 0x80.
- h.lookupStart = n.refIndex - 1
- return h
-}
-
-// generate generates and returns the trie for n.
-func (b *trieBuilder) generate() (t *trie, err error) {
- t = b.t
- if len(b.valueBlocks) >= 1<<16 {
- return nil, fmt.Errorf("maximum number of value blocks exceeded (%d > %d)", len(b.valueBlocks), 1<<16)
- }
- if len(b.lookupBlocks) >= 1<<16 {
- return nil, fmt.Errorf("maximum number of lookup blocks exceeded (%d > %d)", len(b.lookupBlocks), 1<<16)
- }
- genValueBlock(t, b.valueBlocks[0])
- genValueBlock(t, &trieNode{value: make([]uint32, 64)})
- for i := 2; i < len(b.valueBlocks); i++ {
- genValueBlock(t, b.valueBlocks[i])
- }
- n := &trieNode{index: make([]*trieNode, 64)}
- genLookupBlock(t, n)
- genLookupBlock(t, n)
- genLookupBlock(t, n)
- for i := 3; i < len(b.lookupBlocks); i++ {
- genLookupBlock(t, b.lookupBlocks[i])
- }
- return b.t, nil
-}
-
-func (t *trie) printArrays(w io.Writer, name string) (n, size int, err error) {
- p := func(f string, a ...interface{}) {
- nn, e := fmt.Fprintf(w, f, a...)
- n += nn
- if err == nil {
- err = e
- }
- }
- nv := len(t.values)
- p("// %sValues: %d entries, %d bytes\n", name, nv, nv*4)
- p("// Block 2 is the null block.\n")
- p("var %sValues = [%d]uint32 {", name, nv)
- var printnewline bool
- for i, v := range t.values {
- if i%blockSize == 0 {
- p("\n\t// Block %#x, offset %#x", i/blockSize, i)
- }
- if i%4 == 0 {
- printnewline = true
- }
- if v != 0 {
- if printnewline {
- p("\n\t")
- printnewline = false
- }
- p("%#04x:%#08x, ", i, v)
- }
- }
- p("\n}\n\n")
- ni := len(t.index)
- p("// %sLookup: %d entries, %d bytes\n", name, ni, ni*2)
- p("// Block 0 is the null block.\n")
- p("var %sLookup = [%d]uint16 {", name, ni)
- printnewline = false
- for i, v := range t.index {
- if i%blockSize == 0 {
- p("\n\t// Block %#x, offset %#x", i/blockSize, i)
- }
- if i%8 == 0 {
- printnewline = true
- }
- if v != 0 {
- if printnewline {
- p("\n\t")
- printnewline = false
- }
- p("%#03x:%#02x, ", i, v)
- }
- }
- p("\n}\n\n")
- return n, nv*4 + ni*2, err
-}
-
-func (t *trie) printStruct(w io.Writer, handle *trieHandle, name string) (n, sz int, err error) {
- const msg = "trie{ %sLookup[%d:], %sValues[%d:], %sLookup[:], %sValues[:]}"
- n, err = fmt.Fprintf(w, msg, name, handle.lookupStart*blockSize, name, handle.valueStart*blockSize, name, name)
- sz += int(reflect.TypeOf(trie{}).Size())
- return
-}