diff options
author | Niall Sheridan <nsheridan@gmail.com> | 2017-10-18 13:15:14 +0100 |
---|---|---|
committer | Niall Sheridan <niall@intercom.io> | 2017-10-18 13:25:46 +0100 |
commit | 7b320119ba532fd409ec7dade7ad02011c309599 (patch) | |
tree | a39860f35b55e6cc499f8f5bfa969138c5dd6b73 /vendor/golang.org/x/text/collate/build | |
parent | 7c99874c7a3e7a89716f3ee0cdf696532e35ae35 (diff) |
Update dependencies
Diffstat (limited to 'vendor/golang.org/x/text/collate/build')
-rw-r--r-- | vendor/golang.org/x/text/collate/build/builder.go | 702 | ||||
-rw-r--r-- | vendor/golang.org/x/text/collate/build/colelem.go | 294 | ||||
-rw-r--r-- | vendor/golang.org/x/text/collate/build/contract.go | 309 | ||||
-rw-r--r-- | vendor/golang.org/x/text/collate/build/order.go | 393 | ||||
-rw-r--r-- | vendor/golang.org/x/text/collate/build/table.go | 81 | ||||
-rw-r--r-- | vendor/golang.org/x/text/collate/build/trie.go | 290 |
6 files changed, 2069 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/collate/build/builder.go b/vendor/golang.org/x/text/collate/build/builder.go new file mode 100644 index 0000000..1104284 --- /dev/null +++ b/vendor/golang.org/x/text/collate/build/builder.go @@ -0,0 +1,702 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package build // import "golang.org/x/text/collate/build" + +import ( + "fmt" + "io" + "log" + "sort" + "strings" + "unicode/utf8" + + "golang.org/x/text/internal/colltab" + "golang.org/x/text/language" + "golang.org/x/text/unicode/norm" +) + +// TODO: optimizations: +// - expandElem is currently 20K. By putting unique colElems in a separate +// table and having a byte array of indexes into this table, we can reduce +// the total size to about 7K. By also factoring out the length bytes, we +// can reduce this to about 6K. +// - trie valueBlocks are currently 100K. There are a lot of sparse blocks +// and many consecutive values with the same stride. This can be further +// compacted. +// - Compress secondary weights into 8 bits. +// - Some LDML specs specify a context element. Currently we simply concatenate +// those. Context can be implemented using the contraction trie. If Builder +// could analyze and detect when using a context makes sense, there is no +// need to expose this construct in the API. + +// A Builder builds a root collation table. The user must specify the +// collation elements for each entry. A common use will be to base the weights +// on those specified in the allkeys* file as provided by the UCA or CLDR. +type Builder struct { + index *trieBuilder + root ordering + locale []*Tailoring + t *table + err error + built bool + + minNonVar int // lowest primary recorded for a variable + varTop int // highest primary recorded for a non-variable + + // indexes used for reusing expansions and contractions + expIndex map[string]int // positions of expansions keyed by their string representation + ctHandle map[string]ctHandle // contraction handles keyed by a concatenation of the suffixes + ctElem map[string]int // contraction elements keyed by their string representation +} + +// A Tailoring builds a collation table based on another collation table. +// The table is defined by specifying tailorings to the underlying table. +// See http://unicode.org/reports/tr35/ for an overview of tailoring +// collation tables. The CLDR contains pre-defined tailorings for a variety +// of languages (See http://www.unicode.org/Public/cldr/<version>/core.zip.) +type Tailoring struct { + id string + builder *Builder + index *ordering + + anchor *entry + before bool +} + +// NewBuilder returns a new Builder. +func NewBuilder() *Builder { + return &Builder{ + index: newTrieBuilder(), + root: makeRootOrdering(), + expIndex: make(map[string]int), + ctHandle: make(map[string]ctHandle), + ctElem: make(map[string]int), + } +} + +// Tailoring returns a Tailoring for the given locale. One should +// have completed all calls to Add before calling Tailoring. +func (b *Builder) Tailoring(loc language.Tag) *Tailoring { + t := &Tailoring{ + id: loc.String(), + builder: b, + index: b.root.clone(), + } + t.index.id = t.id + b.locale = append(b.locale, t) + return t +} + +// Add adds an entry to the collation element table, mapping +// a slice of runes to a sequence of collation elements. +// A collation element is specified as list of weights: []int{primary, secondary, ...}. +// The entries are typically obtained from a collation element table +// as defined in http://www.unicode.org/reports/tr10/#Data_Table_Format. +// Note that the collation elements specified by colelems are only used +// as a guide. The actual weights generated by Builder may differ. +// The argument variables is a list of indices into colelems that should contain +// a value for each colelem that is a variable. (See the reference above.) +func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error { + str := string(runes) + elems := make([]rawCE, len(colelems)) + for i, ce := range colelems { + if len(ce) == 0 { + break + } + elems[i] = makeRawCE(ce, 0) + if len(ce) == 1 { + elems[i].w[1] = defaultSecondary + } + if len(ce) <= 2 { + elems[i].w[2] = defaultTertiary + } + if len(ce) <= 3 { + elems[i].w[3] = ce[0] + } + } + for i, ce := range elems { + p := ce.w[0] + isvar := false + for _, j := range variables { + if i == j { + isvar = true + } + } + if isvar { + if p >= b.minNonVar && b.minNonVar > 0 { + return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", p, b.minNonVar) + } + if p > b.varTop { + b.varTop = p + } + } else if p > 1 { // 1 is a special primary value reserved for FFFE + if p <= b.varTop { + return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", p, b.varTop) + } + if b.minNonVar == 0 || p < b.minNonVar { + b.minNonVar = p + } + } + } + elems, err := convertLargeWeights(elems) + if err != nil { + return err + } + cccs := []uint8{} + nfd := norm.NFD.String(str) + for i := range nfd { + cccs = append(cccs, norm.NFD.PropertiesString(nfd[i:]).CCC()) + } + if len(cccs) < len(elems) { + if len(cccs) > 2 { + return fmt.Errorf("number of decomposed characters should be greater or equal to the number of collation elements for len(colelems) > 3 (%d < %d)", len(cccs), len(elems)) + } + p := len(elems) - 1 + for ; p > 0 && elems[p].w[0] == 0; p-- { + elems[p].ccc = cccs[len(cccs)-1] + } + for ; p >= 0; p-- { + elems[p].ccc = cccs[0] + } + } else { + for i := range elems { + elems[i].ccc = cccs[i] + } + } + // doNorm in collate.go assumes that the following conditions hold. + if len(elems) > 1 && len(cccs) > 1 && cccs[0] != 0 && cccs[0] != cccs[len(cccs)-1] { + return fmt.Errorf("incompatible CCC values for expansion %X (%d)", runes, cccs) + } + b.root.newEntry(str, elems) + return nil +} + +func (t *Tailoring) setAnchor(anchor string) error { + anchor = norm.NFC.String(anchor) + a := t.index.find(anchor) + if a == nil { + a = t.index.newEntry(anchor, nil) + a.implicit = true + a.modified = true + for _, r := range []rune(anchor) { + e := t.index.find(string(r)) + e.lock = true + } + } + t.anchor = a + return nil +} + +// SetAnchor sets the point after which elements passed in subsequent calls to +// Insert will be inserted. It is equivalent to the reset directive in an LDML +// specification. See Insert for an example. +// SetAnchor supports the following logical reset positions: +// <first_tertiary_ignorable/>, <last_teriary_ignorable/>, <first_primary_ignorable/>, +// and <last_non_ignorable/>. +func (t *Tailoring) SetAnchor(anchor string) error { + if err := t.setAnchor(anchor); err != nil { + return err + } + t.before = false + return nil +} + +// SetAnchorBefore is similar to SetAnchor, except that subsequent calls to +// Insert will insert entries before the anchor. +func (t *Tailoring) SetAnchorBefore(anchor string) error { + if err := t.setAnchor(anchor); err != nil { + return err + } + t.before = true + return nil +} + +// Insert sets the ordering of str relative to the entry set by the previous +// call to SetAnchor or Insert. The argument extend corresponds +// to the extend elements as defined in LDML. A non-empty value for extend +// will cause the collation elements corresponding to extend to be appended +// to the collation elements generated for the entry added by Insert. +// This has the same net effect as sorting str after the string anchor+extend. +// See http://www.unicode.org/reports/tr10/#Tailoring_Example for details +// on parametric tailoring and http://unicode.org/reports/tr35/#Collation_Elements +// for full details on LDML. +// +// Examples: create a tailoring for Swedish, where "ä" is ordered after "z" +// at the primary sorting level: +// t := b.Tailoring("se") +// t.SetAnchor("z") +// t.Insert(colltab.Primary, "ä", "") +// Order "ü" after "ue" at the secondary sorting level: +// t.SetAnchor("ue") +// t.Insert(colltab.Secondary, "ü","") +// or +// t.SetAnchor("u") +// t.Insert(colltab.Secondary, "ü", "e") +// Order "q" afer "ab" at the secondary level and "Q" after "q" +// at the tertiary level: +// t.SetAnchor("ab") +// t.Insert(colltab.Secondary, "q", "") +// t.Insert(colltab.Tertiary, "Q", "") +// Order "b" before "a": +// t.SetAnchorBefore("a") +// t.Insert(colltab.Primary, "b", "") +// Order "0" after the last primary ignorable: +// t.SetAnchor("<last_primary_ignorable/>") +// t.Insert(colltab.Primary, "0", "") +func (t *Tailoring) Insert(level colltab.Level, str, extend string) error { + if t.anchor == nil { + return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str) + } + str = norm.NFC.String(str) + e := t.index.find(str) + if e == nil { + e = t.index.newEntry(str, nil) + } else if e.logical != noAnchor { + return fmt.Errorf("%s:Insert: cannot reinsert logical reset position %q", t.id, e.str) + } + if e.lock { + return fmt.Errorf("%s:Insert: cannot reinsert element %q", t.id, e.str) + } + a := t.anchor + // Find the first element after the anchor which differs at a level smaller or + // equal to the given level. Then insert at this position. + // See http://unicode.org/reports/tr35/#Collation_Elements, Section 5.14.5 for details. + e.before = t.before + if t.before { + t.before = false + if a.prev == nil { + a.insertBefore(e) + } else { + for a = a.prev; a.level > level; a = a.prev { + } + a.insertAfter(e) + } + e.level = level + } else { + for ; a.level > level; a = a.next { + } + e.level = a.level + if a != e { + a.insertAfter(e) + a.level = level + } else { + // We don't set a to prev itself. This has the effect of the entry + // getting new collation elements that are an increment of itself. + // This is intentional. + a.prev.level = level + } + } + e.extend = norm.NFD.String(extend) + e.exclude = false + e.modified = true + e.elems = nil + t.anchor = e + return nil +} + +func (o *ordering) getWeight(e *entry) []rawCE { + if len(e.elems) == 0 && e.logical == noAnchor { + if e.implicit { + for _, r := range e.runes { + e.elems = append(e.elems, o.getWeight(o.find(string(r)))...) + } + } else if e.before { + count := [colltab.Identity + 1]int{} + a := e + for ; a.elems == nil && !a.implicit; a = a.next { + count[a.level]++ + } + e.elems = []rawCE{makeRawCE(a.elems[0].w, a.elems[0].ccc)} + for i := colltab.Primary; i < colltab.Quaternary; i++ { + if count[i] != 0 { + e.elems[0].w[i] -= count[i] + break + } + } + if e.prev != nil { + o.verifyWeights(e.prev, e, e.prev.level) + } + } else { + prev := e.prev + e.elems = nextWeight(prev.level, o.getWeight(prev)) + o.verifyWeights(e, e.next, e.level) + } + } + return e.elems +} + +func (o *ordering) addExtension(e *entry) { + if ex := o.find(e.extend); ex != nil { + e.elems = append(e.elems, ex.elems...) + } else { + for _, r := range []rune(e.extend) { + e.elems = append(e.elems, o.find(string(r)).elems...) + } + } + e.extend = "" +} + +func (o *ordering) verifyWeights(a, b *entry, level colltab.Level) error { + if level == colltab.Identity || b == nil || b.elems == nil || a.elems == nil { + return nil + } + for i := colltab.Primary; i < level; i++ { + if a.elems[0].w[i] < b.elems[0].w[i] { + return nil + } + } + if a.elems[0].w[level] >= b.elems[0].w[level] { + err := fmt.Errorf("%s:overflow: collation elements of %q (%X) overflows those of %q (%X) at level %d (%X >= %X)", o.id, a.str, a.runes, b.str, b.runes, level, a.elems, b.elems) + log.Println(err) + // TODO: return the error instead, or better, fix the conflicting entry by making room. + } + return nil +} + +func (b *Builder) error(e error) { + if e != nil { + b.err = e + } +} + +func (b *Builder) errorID(locale string, e error) { + if e != nil { + b.err = fmt.Errorf("%s:%v", locale, e) + } +} + +// patchNorm ensures that NFC and NFD counterparts are consistent. +func (o *ordering) patchNorm() { + // Insert the NFD counterparts, if necessary. + for _, e := range o.ordered { + nfd := norm.NFD.String(e.str) + if nfd != e.str { + if e0 := o.find(nfd); e0 != nil && !e0.modified { + e0.elems = e.elems + } else if e.modified && !equalCEArrays(o.genColElems(nfd), e.elems) { + e := o.newEntry(nfd, e.elems) + e.modified = true + } + } + } + // Update unchanged composed forms if one of their parts changed. + for _, e := range o.ordered { + nfd := norm.NFD.String(e.str) + if e.modified || nfd == e.str { + continue + } + if e0 := o.find(nfd); e0 != nil { + e.elems = e0.elems + } else { + e.elems = o.genColElems(nfd) + if norm.NFD.LastBoundary([]byte(nfd)) == 0 { + r := []rune(nfd) + head := string(r[0]) + tail := "" + for i := 1; i < len(r); i++ { + s := norm.NFC.String(head + string(r[i])) + if e0 := o.find(s); e0 != nil && e0.modified { + head = s + } else { + tail += string(r[i]) + } + } + e.elems = append(o.genColElems(head), o.genColElems(tail)...) + } + } + } + // Exclude entries for which the individual runes generate the same collation elements. + for _, e := range o.ordered { + if len(e.runes) > 1 && equalCEArrays(o.genColElems(e.str), e.elems) { + e.exclude = true + } + } +} + +func (b *Builder) buildOrdering(o *ordering) { + for _, e := range o.ordered { + o.getWeight(e) + } + for _, e := range o.ordered { + o.addExtension(e) + } + o.patchNorm() + o.sort() + simplify(o) + b.processExpansions(o) // requires simplify + b.processContractions(o) // requires simplify + + t := newNode() + for e := o.front(); e != nil; e, _ = e.nextIndexed() { + if !e.skip() { + ce, err := e.encode() + b.errorID(o.id, err) + t.insert(e.runes[0], ce) + } + } + o.handle = b.index.addTrie(t) +} + +func (b *Builder) build() (*table, error) { + if b.built { + return b.t, b.err + } + b.built = true + b.t = &table{ + Table: colltab.Table{ + MaxContractLen: utf8.UTFMax, + VariableTop: uint32(b.varTop), + }, + } + + b.buildOrdering(&b.root) + b.t.root = b.root.handle + for _, t := range b.locale { + b.buildOrdering(t.index) + if b.err != nil { + break + } + } + i, err := b.index.generate() + b.t.trie = *i + b.t.Index = colltab.Trie{ + Index: i.index, + Values: i.values, + Index0: i.index[blockSize*b.t.root.lookupStart:], + Values0: i.values[blockSize*b.t.root.valueStart:], + } + b.error(err) + return b.t, b.err +} + +// Build builds the root Collator. +func (b *Builder) Build() (colltab.Weighter, error) { + table, err := b.build() + if err != nil { + return nil, err + } + return table, nil +} + +// Build builds a Collator for Tailoring t. +func (t *Tailoring) Build() (colltab.Weighter, error) { + // TODO: implement. + return nil, nil +} + +// Print prints the tables for b and all its Tailorings as a Go file +// that can be included in the Collate package. +func (b *Builder) Print(w io.Writer) (n int, err error) { + p := func(nn int, e error) { + n += nn + if err == nil { + err = e + } + } + t, err := b.build() + if err != nil { + return 0, err + } + p(fmt.Fprintf(w, `var availableLocales = "und`)) + for _, loc := range b.locale { + if loc.id != "und" { + p(fmt.Fprintf(w, ",%s", loc.id)) + } + } + p(fmt.Fprint(w, "\"\n\n")) + p(fmt.Fprintf(w, "const varTop = 0x%x\n\n", b.varTop)) + p(fmt.Fprintln(w, "var locales = [...]tableIndex{")) + for _, loc := range b.locale { + if loc.id == "und" { + p(t.fprintIndex(w, loc.index.handle, loc.id)) + } + } + for _, loc := range b.locale { + if loc.id != "und" { + p(t.fprintIndex(w, loc.index.handle, loc.id)) + } + } + p(fmt.Fprint(w, "}\n\n")) + n, _, err = t.fprint(w, "main") + return +} + +// reproducibleFromNFKD checks whether the given expansion could be generated +// from an NFKD expansion. +func reproducibleFromNFKD(e *entry, exp, nfkd []rawCE) bool { + // Length must be equal. + if len(exp) != len(nfkd) { + return false + } + for i, ce := range exp { + // Primary and secondary values should be equal. + if ce.w[0] != nfkd[i].w[0] || ce.w[1] != nfkd[i].w[1] { + return false + } + // Tertiary values should be equal to maxTertiary for third element onwards. + // TODO: there seem to be a lot of cases in CLDR (e.g. ㏭ in zh.xml) that can + // simply be dropped. Try this out by dropping the following code. + if i >= 2 && ce.w[2] != maxTertiary { + return false + } + if _, err := makeCE(ce); err != nil { + // Simply return false. The error will be caught elsewhere. + return false + } + } + return true +} + +func simplify(o *ordering) { + // Runes that are a starter of a contraction should not be removed. + // (To date, there is only Kannada character 0CCA.) + keep := make(map[rune]bool) + for e := o.front(); e != nil; e, _ = e.nextIndexed() { + if len(e.runes) > 1 { + keep[e.runes[0]] = true + } + } + // Tag entries for which the runes NFKD decompose to identical values. + for e := o.front(); e != nil; e, _ = e.nextIndexed() { + s := e.str + nfkd := norm.NFKD.String(s) + nfd := norm.NFD.String(s) + if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == nfd { + continue + } + if reproducibleFromNFKD(e, e.elems, o.genColElems(nfkd)) { + e.decompose = true + } + } +} + +// appendExpansion converts the given collation sequence to +// collation elements and adds them to the expansion table. +// It returns an index to the expansion table. +func (b *Builder) appendExpansion(e *entry) int { + t := b.t + i := len(t.ExpandElem) + ce := uint32(len(e.elems)) + t.ExpandElem = append(t.ExpandElem, ce) + for _, w := range e.elems { + ce, err := makeCE(w) + if err != nil { + b.error(err) + return -1 + } + t.ExpandElem = append(t.ExpandElem, ce) + } + return i +} + +// processExpansions extracts data necessary to generate +// the extraction tables. +func (b *Builder) processExpansions(o *ordering) { + for e := o.front(); e != nil; e, _ = e.nextIndexed() { + if !e.expansion() { + continue + } + key := fmt.Sprintf("%v", e.elems) + i, ok := b.expIndex[key] + if !ok { + i = b.appendExpansion(e) + b.expIndex[key] = i + } + e.expansionIndex = i + } +} + +func (b *Builder) processContractions(o *ordering) { + // Collate contractions per starter rune. + starters := []rune{} + cm := make(map[rune][]*entry) + for e := o.front(); e != nil; e, _ = e.nextIndexed() { + if e.contraction() { + if len(e.str) > b.t.MaxContractLen { + b.t.MaxContractLen = len(e.str) + } + r := e.runes[0] + if _, ok := cm[r]; !ok { + starters = append(starters, r) + } + cm[r] = append(cm[r], e) + } + } + // Add entries of single runes that are at a start of a contraction. + for e := o.front(); e != nil; e, _ = e.nextIndexed() { + if !e.contraction() { + r := e.runes[0] + if _, ok := cm[r]; ok { + cm[r] = append(cm[r], e) + } + } + } + // Build the tries for the contractions. + t := b.t + for _, r := range starters { + l := cm[r] + // Compute suffix strings. There are 31 different contraction suffix + // sets for 715 contractions and 82 contraction starter runes as of + // version 6.0.0. + sufx := []string{} + hasSingle := false + for _, e := range l { + if len(e.runes) > 1 { + sufx = append(sufx, string(e.runes[1:])) + } else { + hasSingle = true + } + } + if !hasSingle { + b.error(fmt.Errorf("no single entry for starter rune %U found", r)) + continue + } + // Unique the suffix set. + sort.Strings(sufx) + key := strings.Join(sufx, "\n") + handle, ok := b.ctHandle[key] + if !ok { + var err error + handle, err = appendTrie(&t.ContractTries, sufx) + if err != nil { + b.error(err) + } + b.ctHandle[key] = handle + } + // Bucket sort entries in index order. + es := make([]*entry, len(l)) + for _, e := range l { + var p, sn int + if len(e.runes) > 1 { + str := []byte(string(e.runes[1:])) + p, sn = lookup(&t.ContractTries, handle, str) + if sn != len(str) { + log.Fatalf("%s: processContractions: unexpected length for '%X'; len=%d; want %d", o.id, e.runes, sn, len(str)) + } + } + if es[p] != nil { + log.Fatalf("%s: multiple contractions for position %d for rune %U", o.id, p, e.runes[0]) + } + es[p] = e + } + // Create collation elements for contractions. + elems := []uint32{} + for _, e := range es { + ce, err := e.encodeBase() + b.errorID(o.id, err) + elems = append(elems, ce) + } + key = fmt.Sprintf("%v", elems) + i, ok := b.ctElem[key] + if !ok { + i = len(t.ContractElem) + b.ctElem[key] = i + t.ContractElem = append(t.ContractElem, elems...) + } + // Store info in entry for starter rune. + es[0].contractionIndex = i + es[0].contractionHandle = handle + } +} diff --git a/vendor/golang.org/x/text/collate/build/colelem.go b/vendor/golang.org/x/text/collate/build/colelem.go new file mode 100644 index 0000000..726fe54 --- /dev/null +++ b/vendor/golang.org/x/text/collate/build/colelem.go @@ -0,0 +1,294 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package build + +import ( + "fmt" + "unicode" + + "golang.org/x/text/internal/colltab" +) + +const ( + defaultSecondary = 0x20 + defaultTertiary = 0x2 + maxTertiary = 0x1F +) + +type rawCE struct { + w []int + ccc uint8 +} + +func makeRawCE(w []int, ccc uint8) rawCE { + ce := rawCE{w: make([]int, 4), ccc: ccc} + copy(ce.w, w) + return ce +} + +// A collation element is represented as an uint32. +// In the typical case, a rune maps to a single collation element. If a rune +// can be the start of a contraction or expands into multiple collation elements, +// then the collation element that is associated with a rune will have a special +// form to represent such m to n mappings. Such special collation elements +// have a value >= 0x80000000. + +const ( + maxPrimaryBits = 21 + maxSecondaryBits = 12 + maxTertiaryBits = 8 +) + +func makeCE(ce rawCE) (uint32, error) { + v, e := colltab.MakeElem(ce.w[0], ce.w[1], ce.w[2], ce.ccc) + return uint32(v), e +} + +// For contractions, collation elements are of the form +// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where +// - n* is the size of the first node in the contraction trie. +// - i* is the index of the first node in the contraction trie. +// - b* is the offset into the contraction collation element table. +// See contract.go for details on the contraction trie. +const ( + contractID = 0xC0000000 + maxNBits = 4 + maxTrieIndexBits = 12 + maxContractOffsetBits = 13 +) + +func makeContractIndex(h ctHandle, offset int) (uint32, error) { + if h.n >= 1<<maxNBits { + return 0, fmt.Errorf("size of contraction trie node too large: %d >= %d", h.n, 1<<maxNBits) + } + if h.index >= 1<<maxTrieIndexBits { + return 0, fmt.Errorf("size of contraction trie offset too large: %d >= %d", h.index, 1<<maxTrieIndexBits) + } + if offset >= 1<<maxContractOffsetBits { + return 0, fmt.Errorf("contraction offset out of bounds: %x >= %x", offset, 1<<maxContractOffsetBits) + } + ce := uint32(contractID) + ce += uint32(offset << (maxNBits + maxTrieIndexBits)) + ce += uint32(h.index << maxNBits) + ce += uint32(h.n) + return ce, nil +} + +// For expansions, collation elements are of the form +// 11100000 00000000 bbbbbbbb bbbbbbbb, +// where b* is the index into the expansion sequence table. +const ( + expandID = 0xE0000000 + maxExpandIndexBits = 16 +) + +func makeExpandIndex(index int) (uint32, error) { + if index >= 1<<maxExpandIndexBits { + return 0, fmt.Errorf("expansion index out of bounds: %x >= %x", index, 1<<maxExpandIndexBits) + } + return expandID + uint32(index), nil +} + +// Each list of collation elements corresponding to an expansion starts with +// a header indicating the length of the sequence. +func makeExpansionHeader(n int) (uint32, error) { + return uint32(n), nil +} + +// Some runes can be expanded using NFKD decomposition. Instead of storing the full +// sequence of collation elements, we decompose the rune and lookup the collation +// elements for each rune in the decomposition and modify the tertiary weights. +// The collation element, in this case, is of the form +// 11110000 00000000 wwwwwwww vvvvvvvv, where +// - v* is the replacement tertiary weight for the first rune, +// - w* is the replacement tertiary weight for the second rune, +// Tertiary weights of subsequent runes should be replaced with maxTertiary. +// See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details. +const ( + decompID = 0xF0000000 +) + +func makeDecompose(t1, t2 int) (uint32, error) { + if t1 >= 256 || t1 < 0 { + return 0, fmt.Errorf("first tertiary weight out of bounds: %d >= 256", t1) + } + if t2 >= 256 || t2 < 0 { + return 0, fmt.Errorf("second tertiary weight out of bounds: %d >= 256", t2) + } + return uint32(t2<<8+t1) + decompID, nil +} + +const ( + // These constants were taken from http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf. + minUnified rune = 0x4E00 + maxUnified = 0x9FFF + minCompatibility = 0xF900 + maxCompatibility = 0xFAFF + minRare = 0x3400 + maxRare = 0x4DBF +) +const ( + commonUnifiedOffset = 0x10000 + rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF + otherOffset = 0x50000 // largest rune in rare is U+2FA1D + illegalOffset = otherOffset + int(unicode.MaxRune) + maxPrimary = illegalOffset + 1 +) + +// implicitPrimary returns the primary weight for the a rune +// for which there is no entry for the rune in the collation table. +// We take a different approach from the one specified in +// http://unicode.org/reports/tr10/#Implicit_Weights, +// but preserve the resulting relative ordering of the runes. +func implicitPrimary(r rune) int { + if unicode.Is(unicode.Ideographic, r) { + if r >= minUnified && r <= maxUnified { + // The most common case for CJK. + return int(r) + commonUnifiedOffset + } + if r >= minCompatibility && r <= maxCompatibility { + // This will typically not hit. The DUCET explicitly specifies mappings + // for all characters that do not decompose. + return int(r) + commonUnifiedOffset + } + return int(r) + rareUnifiedOffset + } + return int(r) + otherOffset +} + +// convertLargeWeights converts collation elements with large +// primaries (either double primaries or for illegal runes) +// to our own representation. +// A CJK character C is represented in the DUCET as +// [.FBxx.0020.0002.C][.BBBB.0000.0000.C] +// We will rewrite these characters to a single CE. +// We assume the CJK values start at 0x8000. +// See http://unicode.org/reports/tr10/#Implicit_Weights +func convertLargeWeights(elems []rawCE) (res []rawCE, err error) { + const ( + cjkPrimaryStart = 0xFB40 + rarePrimaryStart = 0xFB80 + otherPrimaryStart = 0xFBC0 + illegalPrimary = 0xFFFE + highBitsMask = 0x3F + lowBitsMask = 0x7FFF + lowBitsFlag = 0x8000 + shiftBits = 15 + ) + for i := 0; i < len(elems); i++ { + ce := elems[i].w + p := ce[0] + if p < cjkPrimaryStart { + continue + } + if p > 0xFFFF { + return elems, fmt.Errorf("found primary weight %X; should be <= 0xFFFF", p) + } + if p >= illegalPrimary { + ce[0] = illegalOffset + p - illegalPrimary + } else { + if i+1 >= len(elems) { + return elems, fmt.Errorf("second part of double primary weight missing: %v", elems) + } + if elems[i+1].w[0]&lowBitsFlag == 0 { + return elems, fmt.Errorf("malformed second part of double primary weight: %v", elems) + } + np := ((p & highBitsMask) << shiftBits) + elems[i+1].w[0]&lowBitsMask + switch { + case p < rarePrimaryStart: + np += commonUnifiedOffset + case p < otherPrimaryStart: + np += rareUnifiedOffset + default: + p += otherOffset + } + ce[0] = np + for j := i + 1; j+1 < len(elems); j++ { + elems[j] = elems[j+1] + } + elems = elems[:len(elems)-1] + } + } + return elems, nil +} + +// nextWeight computes the first possible collation weights following elems +// for the given level. +func nextWeight(level colltab.Level, elems []rawCE) []rawCE { + if level == colltab.Identity { + next := make([]rawCE, len(elems)) + copy(next, elems) + return next + } + next := []rawCE{makeRawCE(elems[0].w, elems[0].ccc)} + next[0].w[level]++ + if level < colltab.Secondary { + next[0].w[colltab.Secondary] = defaultSecondary + } + if level < colltab.Tertiary { + next[0].w[colltab.Tertiary] = defaultTertiary + } + // Filter entries that cannot influence ordering. + for _, ce := range elems[1:] { + skip := true + for i := colltab.Primary; i < level; i++ { + skip = skip && ce.w[i] == 0 + } + if !skip { + next = append(next, ce) + } + } + return next +} + +func nextVal(elems []rawCE, i int, level colltab.Level) (index, value int) { + for ; i < len(elems) && elems[i].w[level] == 0; i++ { + } + if i < len(elems) { + return i, elems[i].w[level] + } + return i, 0 +} + +// compareWeights returns -1 if a < b, 1 if a > b, or 0 otherwise. +// It also returns the collation level at which the difference is found. +func compareWeights(a, b []rawCE) (result int, level colltab.Level) { + for level := colltab.Primary; level < colltab.Identity; level++ { + var va, vb int + for ia, ib := 0, 0; ia < len(a) || ib < len(b); ia, ib = ia+1, ib+1 { + ia, va = nextVal(a, ia, level) + ib, vb = nextVal(b, ib, level) + if va != vb { + if va < vb { + return -1, level + } else { + return 1, level + } + } + } + } + return 0, colltab.Identity +} + +func equalCE(a, b rawCE) bool { + for i := 0; i < 3; i++ { + if b.w[i] != a.w[i] { + return false + } + } + return true +} + +func equalCEArrays(a, b []rawCE) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if !equalCE(a[i], b[i]) { + return false + } + } + return true +} diff --git a/vendor/golang.org/x/text/collate/build/contract.go b/vendor/golang.org/x/text/collate/build/contract.go new file mode 100644 index 0000000..a6a7e01 --- /dev/null +++ b/vendor/golang.org/x/text/collate/build/contract.go @@ -0,0 +1,309 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package build + +import ( + "fmt" + "io" + "reflect" + "sort" + "strings" + + "golang.org/x/text/internal/colltab" +) + +// This file contains code for detecting contractions and generating +// the necessary tables. +// Any Unicode Collation Algorithm (UCA) table entry that has more than +// one rune one the left-hand side is called a contraction. +// See http://www.unicode.org/reports/tr10/#Contractions for more details. +// +// We define the following terms: +// initial: a rune that appears as the first rune in a contraction. +// suffix: a sequence of runes succeeding the initial rune +// in a given contraction. +// non-initial: a rune that appears in a suffix. +// +// A rune may be both an initial and a non-initial and may be so in +// many contractions. An initial may typically also appear by itself. +// In case of ambiguities, the UCA requires we match the longest +// contraction. +// +// Many contraction rules share the same set of possible suffixes. +// We store sets of suffixes in a trie that associates an index with +// each suffix in the set. This index can be used to look up a +// collation element associated with the (starter rune, suffix) pair. +// +// The trie is defined on a UTF-8 byte sequence. +// The overall trie is represented as an array of ctEntries. Each node of the trie +// is represented as a subsequence of ctEntries, where each entry corresponds to +// a possible match of a next character in the search string. An entry +// also includes the length and offset to the next sequence of entries +// to check in case of a match. + +const ( + final = 0 + noIndex = 0xFF +) + +// ctEntry associates to a matching byte an offset and/or next sequence of +// bytes to check. A ctEntry c is called final if a match means that the +// longest suffix has been found. An entry c is final if c.N == 0. +// A single final entry can match a range of characters to an offset. +// A non-final entry always matches a single byte. Note that a non-final +// entry might still resemble a completed suffix. +// Examples: +// The suffix strings "ab" and "ac" can be represented as: +// []ctEntry{ +// {'a', 1, 1, noIndex}, // 'a' by itself does not match, so i is 0xFF. +// {'b', 'c', 0, 1}, // "ab" -> 1, "ac" -> 2 +// } +// +// The suffix strings "ab", "abc", "abd", and "abcd" can be represented as: +// []ctEntry{ +// {'a', 1, 1, noIndex}, // 'a' must be followed by 'b'. +// {'b', 1, 2, 1}, // "ab" -> 1, may be followed by 'c' or 'd'. +// {'d', 'd', final, 3}, // "abd" -> 3 +// {'c', 4, 1, 2}, // "abc" -> 2, may be followed by 'd'. +// {'d', 'd', final, 4}, // "abcd" -> 4 +// } +// See genStateTests in contract_test.go for more examples. +type ctEntry struct { + L uint8 // non-final: byte value to match; final: lowest match in range. + H uint8 // non-final: relative index to next block; final: highest match in range. + N uint8 // non-final: length of next block; final: final + I uint8 // result offset. Will be noIndex if more bytes are needed to complete. +} + +// contractTrieSet holds a set of contraction tries. The tries are stored +// consecutively in the entry field. +type contractTrieSet []struct{ l, h, n, i uint8 } + +// ctHandle is used to identify a trie in the trie set, consisting in an offset +// in the array and the size of the first node. +type ctHandle struct { + index, n int +} + +// appendTrie adds a new trie for the given suffixes to the trie set and returns +// a handle to it. The handle will be invalid on error. +func appendTrie(ct *colltab.ContractTrieSet, suffixes []string) (ctHandle, error) { + es := make([]stridx, len(suffixes)) + for i, s := range suffixes { + es[i].str = s + } + sort.Sort(offsetSort(es)) + for i := range es { + es[i].index = i + 1 + } + sort.Sort(genidxSort(es)) + i := len(*ct) + n, err := genStates(ct, es) + if err != nil { + *ct = (*ct)[:i] + return ctHandle{}, err + } + return ctHandle{i, n}, nil +} + +// genStates generates ctEntries for a given suffix set and returns +// the number of entries for the first node. +func genStates(ct *colltab.ContractTrieSet, sis []stridx) (int, error) { + if len(sis) == 0 { + return 0, fmt.Errorf("genStates: list of suffices must be non-empty") + } + start := len(*ct) + // create entries for differing first bytes. + for _, si := range sis { + s := si.str + if len(s) == 0 { + continue + } + added := false + c := s[0] + if len(s) > 1 { + for j := len(*ct) - 1; j >= start; j-- { + if (*ct)[j].L == c { + added = true + break + } + } + if !added { + *ct = append(*ct, ctEntry{L: c, I: noIndex}) + } + } else { + for j := len(*ct) - 1; j >= start; j-- { + // Update the offset for longer suffixes with the same byte. + if (*ct)[j].L == c { + (*ct)[j].I = uint8(si.index) + added = true + } + // Extend range of final ctEntry, if possible. + if (*ct)[j].H+1 == c { + (*ct)[j].H = c + added = true + } + } + if !added { + *ct = append(*ct, ctEntry{L: c, H: c, N: final, I: uint8(si.index)}) + } + } + } + n := len(*ct) - start + // Append nodes for the remainder of the suffixes for each ctEntry. + sp := 0 + for i, end := start, len(*ct); i < end; i++ { + fe := (*ct)[i] + if fe.H == 0 { // uninitialized non-final + ln := len(*ct) - start - n + if ln > 0xFF { + return 0, fmt.Errorf("genStates: relative block offset too large: %d > 255", ln) + } + fe.H = uint8(ln) + // Find first non-final strings with same byte as current entry. + for ; sis[sp].str[0] != fe.L; sp++ { + } + se := sp + 1 + for ; se < len(sis) && len(sis[se].str) > 1 && sis[se].str[0] == fe.L; se++ { + } + sl := sis[sp:se] + sp = se + for i, si := range sl { + sl[i].str = si.str[1:] + } + nn, err := genStates(ct, sl) + if err != nil { + return 0, err + } + fe.N = uint8(nn) + (*ct)[i] = fe + } + } + sort.Sort(entrySort((*ct)[start : start+n])) + return n, nil +} + +// There may be both a final and non-final entry for a byte if the byte +// is implied in a range of matches in the final entry. +// We need to ensure that the non-final entry comes first in that case. +type entrySort colltab.ContractTrieSet + +func (fe entrySort) Len() int { return len(fe) } +func (fe entrySort) Swap(i, j int) { fe[i], fe[j] = fe[j], fe[i] } +func (fe entrySort) Less(i, j int) bool { + return fe[i].L > fe[j].L +} + +// stridx is used for sorting suffixes and their associated offsets. +type stridx struct { + str string + index int +} + +// For computing the offsets, we first sort by size, and then by string. +// This ensures that strings that only differ in the last byte by 1 +// are sorted consecutively in increasing order such that they can +// be packed as a range in a final ctEntry. +type offsetSort []stridx + +func (si offsetSort) Len() int { return len(si) } +func (si offsetSort) Swap(i, j int) { si[i], si[j] = si[j], si[i] } +func (si offsetSort) Less(i, j int) bool { + if len(si[i].str) != len(si[j].str) { + return len(si[i].str) > len(si[j].str) + } + return si[i].str < si[j].str +} + +// For indexing, we want to ensure that strings are sorted in string order, where +// for strings with the same prefix, we put longer strings before shorter ones. +type genidxSort []stridx + +func (si genidxSort) Len() int { return len(si) } +func (si genidxSort) Swap(i, j int) { si[i], si[j] = si[j], si[i] } +func (si genidxSort) Less(i, j int) bool { + if strings.HasPrefix(si[j].str, si[i].str) { + return false + } + if strings.HasPrefix(si[i].str, si[j].str) { + return true + } + return si[i].str < si[j].str +} + +// lookup matches the longest suffix in str and returns the associated offset +// and the number of bytes consumed. +func lookup(ct *colltab.ContractTrieSet, h ctHandle, str []byte) (index, ns int) { + states := (*ct)[h.index:] + p := 0 + n := h.n + for i := 0; i < n && p < len(str); { + e := states[i] + c := str[p] + if c >= e.L { + if e.L == c { + p++ + if e.I != noIndex { + index, ns = int(e.I), p + } + if e.N != final { + // set to new state + i, states, n = 0, states[int(e.H)+n:], int(e.N) + } else { + return + } + continue + } else if e.N == final && c <= e.H { + p++ + return int(c-e.L) + int(e.I), p + } + } + i++ + } + return +} + +// print writes the contractTrieSet t as compilable Go code to w. It returns +// the total number of bytes written and the size of the resulting data structure in bytes. +func print(t *colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) { + update3 := func(nn, sz int, e error) { + n += nn + if err == nil { + err = e + } + size += sz + } + update2 := func(nn int, e error) { update3(nn, 0, e) } + + update3(printArray(*t, w, name)) + update2(fmt.Fprintf(w, "var %sContractTrieSet = ", name)) + update3(printStruct(*t, w, name)) + update2(fmt.Fprintln(w)) + return +} + +func printArray(ct colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) { + p := func(f string, a ...interface{}) { + nn, e := fmt.Fprintf(w, f, a...) + n += nn + if err == nil { + err = e + } + } + size = len(ct) * 4 + p("// %sCTEntries: %d entries, %d bytes\n", name, len(ct), size) + p("var %sCTEntries = [%d]struct{L,H,N,I uint8}{\n", name, len(ct)) + for _, fe := range ct { + p("\t{0x%X, 0x%X, %d, %d},\n", fe.L, fe.H, fe.N, fe.I) + } + p("}\n") + return +} + +func printStruct(ct colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) { + n, err = fmt.Fprintf(w, "colltab.ContractTrieSet( %sCTEntries[:] )", name) + size = int(reflect.TypeOf(ct).Size()) + return +} diff --git a/vendor/golang.org/x/text/collate/build/order.go b/vendor/golang.org/x/text/collate/build/order.go new file mode 100644 index 0000000..2c568db --- /dev/null +++ b/vendor/golang.org/x/text/collate/build/order.go @@ -0,0 +1,393 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package build + +import ( + "fmt" + "log" + "sort" + "strings" + "unicode" + + "golang.org/x/text/internal/colltab" + "golang.org/x/text/unicode/norm" +) + +type logicalAnchor int + +const ( + firstAnchor logicalAnchor = -1 + noAnchor = 0 + lastAnchor = 1 +) + +// entry is used to keep track of a single entry in the collation element table +// during building. Examples of entries can be found in the Default Unicode +// Collation Element Table. +// See http://www.unicode.org/Public/UCA/6.0.0/allkeys.txt. +type entry struct { + str string // same as string(runes) + runes []rune + elems []rawCE // the collation elements + extend string // weights of extend to be appended to elems + before bool // weights relative to next instead of previous. + lock bool // entry is used in extension and can no longer be moved. + + // prev, next, and level are used to keep track of tailorings. + prev, next *entry + level colltab.Level // next differs at this level + skipRemove bool // do not unlink when removed + + decompose bool // can use NFKD decomposition to generate elems + exclude bool // do not include in table + implicit bool // derived, is not included in the list + modified bool // entry was modified in tailoring + logical logicalAnchor + + expansionIndex int // used to store index into expansion table + contractionHandle ctHandle + contractionIndex int // index into contraction elements +} + +func (e *entry) String() string { + return fmt.Sprintf("%X (%q) -> %X (ch:%x; ci:%d, ei:%d)", + e.runes, e.str, e.elems, e.contractionHandle, e.contractionIndex, e.expansionIndex) +} + +func (e *entry) skip() bool { + return e.contraction() +} + +func (e *entry) expansion() bool { + return !e.decompose && len(e.elems) > 1 +} + +func (e *entry) contraction() bool { + return len(e.runes) > 1 +} + +func (e *entry) contractionStarter() bool { + return e.contractionHandle.n != 0 +} + +// nextIndexed gets the next entry that needs to be stored in the table. +// It returns the entry and the collation level at which the next entry differs +// from the current entry. +// Entries that can be explicitly derived and logical reset positions are +// examples of entries that will not be indexed. +func (e *entry) nextIndexed() (*entry, colltab.Level) { + level := e.level + for e = e.next; e != nil && (e.exclude || len(e.elems) == 0); e = e.next { + if e.level < level { + level = e.level + } + } + return e, level +} + +// remove unlinks entry e from the sorted chain and clears the collation +// elements. e may not be at the front or end of the list. This should always +// be the case, as the front and end of the list are always logical anchors, +// which may not be removed. +func (e *entry) remove() { + if e.logical != noAnchor { + log.Fatalf("may not remove anchor %q", e.str) + } + // TODO: need to set e.prev.level to e.level if e.level is smaller? + e.elems = nil + if !e.skipRemove { + if e.prev != nil { + e.prev.next = e.next + } + if e.next != nil { + e.next.prev = e.prev + } + } + e.skipRemove = false +} + +// insertAfter inserts n after e. +func (e *entry) insertAfter(n *entry) { + if e == n { + panic("e == anchor") + } + if e == nil { + panic("unexpected nil anchor") + } + n.remove() + n.decompose = false // redo decomposition test + + n.next = e.next + n.prev = e + if e.next != nil { + e.next.prev = n + } + e.next = n +} + +// insertBefore inserts n before e. +func (e *entry) insertBefore(n *entry) { + if e == n { + panic("e == anchor") + } + if e == nil { + panic("unexpected nil anchor") + } + n.remove() + n.decompose = false // redo decomposition test + + n.prev = e.prev + n.next = e + if e.prev != nil { + e.prev.next = n + } + e.prev = n +} + +func (e *entry) encodeBase() (ce uint32, err error) { + switch { + case e.expansion(): + ce, err = makeExpandIndex(e.expansionIndex) + default: + if e.decompose { + log.Fatal("decompose should be handled elsewhere") + } + ce, err = makeCE(e.elems[0]) + } + return +} + +func (e *entry) encode() (ce uint32, err error) { + if e.skip() { + log.Fatal("cannot build colElem for entry that should be skipped") + } + switch { + case e.decompose: + t1 := e.elems[0].w[2] + t2 := 0 + if len(e.elems) > 1 { + t2 = e.elems[1].w[2] + } + ce, err = makeDecompose(t1, t2) + case e.contractionStarter(): + ce, err = makeContractIndex(e.contractionHandle, e.contractionIndex) + default: + if len(e.runes) > 1 { + log.Fatal("colElem: contractions are handled in contraction trie") + } + ce, err = e.encodeBase() + } + return +} + +// entryLess returns true if a sorts before b and false otherwise. +func entryLess(a, b *entry) bool { + if res, _ := compareWeights(a.elems, b.elems); res != 0 { + return res == -1 + } + if a.logical != noAnchor { + return a.logical == firstAnchor + } + if b.logical != noAnchor { + return b.logical == lastAnchor + } + return a.str < b.str +} + +type sortedEntries []*entry + +func (s sortedEntries) Len() int { + return len(s) +} + +func (s sortedEntries) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +func (s sortedEntries) Less(i, j int) bool { + return entryLess(s[i], s[j]) +} + +type ordering struct { + id string + entryMap map[string]*entry + ordered []*entry + handle *trieHandle +} + +// insert inserts e into both entryMap and ordered. +// Note that insert simply appends e to ordered. To reattain a sorted +// order, o.sort() should be called. +func (o *ordering) insert(e *entry) { + if e.logical == noAnchor { + o.entryMap[e.str] = e + } else { + // Use key format as used in UCA rules. + o.entryMap[fmt.Sprintf("[%s]", e.str)] = e + // Also add index entry for XML format. + o.entryMap[fmt.Sprintf("<%s/>", strings.Replace(e.str, " ", "_", -1))] = e + } + o.ordered = append(o.ordered, e) +} + +// newEntry creates a new entry for the given info and inserts it into +// the index. +func (o *ordering) newEntry(s string, ces []rawCE) *entry { + e := &entry{ + runes: []rune(s), + elems: ces, + str: s, + } + o.insert(e) + return e +} + +// find looks up and returns the entry for the given string. +// It returns nil if str is not in the index and if an implicit value +// cannot be derived, that is, if str represents more than one rune. +func (o *ordering) find(str string) *entry { + e := o.entryMap[str] + if e == nil { + r := []rune(str) + if len(r) == 1 { + const ( + firstHangul = 0xAC00 + lastHangul = 0xD7A3 + ) + if r[0] >= firstHangul && r[0] <= lastHangul { + ce := []rawCE{} + nfd := norm.NFD.String(str) + for _, r := range nfd { + ce = append(ce, o.find(string(r)).elems...) + } + e = o.newEntry(nfd, ce) + } else { + e = o.newEntry(string(r[0]), []rawCE{ + {w: []int{ + implicitPrimary(r[0]), + defaultSecondary, + defaultTertiary, + int(r[0]), + }, + }, + }) + e.modified = true + } + e.exclude = true // do not index implicits + } + } + return e +} + +// makeRootOrdering returns a newly initialized ordering value and populates +// it with a set of logical reset points that can be used as anchors. +// The anchors first_tertiary_ignorable and __END__ will always sort at +// the beginning and end, respectively. This means that prev and next are non-nil +// for any indexed entry. +func makeRootOrdering() ordering { + const max = unicode.MaxRune + o := ordering{ + entryMap: make(map[string]*entry), + } + insert := func(typ logicalAnchor, s string, ce []int) { + e := &entry{ + elems: []rawCE{{w: ce}}, + str: s, + exclude: true, + logical: typ, + } + o.insert(e) + } + insert(firstAnchor, "first tertiary ignorable", []int{0, 0, 0, 0}) + insert(lastAnchor, "last tertiary ignorable", []int{0, 0, 0, max}) + insert(lastAnchor, "last primary ignorable", []int{0, defaultSecondary, defaultTertiary, max}) + insert(lastAnchor, "last non ignorable", []int{maxPrimary, defaultSecondary, defaultTertiary, max}) + insert(lastAnchor, "__END__", []int{1 << maxPrimaryBits, defaultSecondary, defaultTertiary, max}) + return o +} + +// patchForInsert eleminates entries from the list with more than one collation element. +// The next and prev fields of the eliminated entries still point to appropriate +// values in the newly created list. +// It requires that sort has been called. +func (o *ordering) patchForInsert() { + for i := 0; i < len(o.ordered)-1; { + e := o.ordered[i] + lev := e.level + n := e.next + for ; n != nil && len(n.elems) > 1; n = n.next { + if n.level < lev { + lev = n.level + } + n.skipRemove = true + } + for ; o.ordered[i] != n; i++ { + o.ordered[i].level = lev + o.ordered[i].next = n + o.ordered[i+1].prev = e + } + } +} + +// clone copies all ordering of es into a new ordering value. +func (o *ordering) clone() *ordering { + o.sort() + oo := ordering{ + entryMap: make(map[string]*entry), + } + for _, e := range o.ordered { + ne := &entry{ + runes: e.runes, + elems: e.elems, + str: e.str, + decompose: e.decompose, + exclude: e.exclude, + logical: e.logical, + } + oo.insert(ne) + } + oo.sort() // link all ordering. + oo.patchForInsert() + return &oo +} + +// front returns the first entry to be indexed. +// It assumes that sort() has been called. +func (o *ordering) front() *entry { + e := o.ordered[0] + if e.prev != nil { + log.Panicf("unexpected first entry: %v", e) + } + // The first entry is always a logical position, which should not be indexed. + e, _ = e.nextIndexed() + return e +} + +// sort sorts all ordering based on their collation elements and initializes +// the prev, next, and level fields accordingly. +func (o *ordering) sort() { + sort.Sort(sortedEntries(o.ordered)) + l := o.ordered + for i := 1; i < len(l); i++ { + k := i - 1 + l[k].next = l[i] + _, l[k].level = compareWeights(l[k].elems, l[i].elems) + l[i].prev = l[k] + } +} + +// genColElems generates a collation element array from the runes in str. This +// assumes that all collation elements have already been added to the Builder. +func (o *ordering) genColElems(str string) []rawCE { + elems := []rawCE{} + for _, r := range []rune(str) { + for _, ce := range o.find(string(r)).elems { + if ce.w[0] != 0 || ce.w[1] != 0 || ce.w[2] != 0 { + elems = append(elems, ce) + } + } + } + return elems +} diff --git a/vendor/golang.org/x/text/collate/build/table.go b/vendor/golang.org/x/text/collate/build/table.go new file mode 100644 index 0000000..7eea7a6 --- /dev/null +++ b/vendor/golang.org/x/text/collate/build/table.go @@ -0,0 +1,81 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package build + +import ( + "fmt" + "io" + "reflect" + + "golang.org/x/text/internal/colltab" +) + +// table is an intermediate structure that roughly resembles the table in collate. +type table struct { + colltab.Table + trie trie + root *trieHandle +} + +// print writes the table as Go compilable code to w. It prefixes the +// variable names with name. It returns the number of bytes written +// and the size of the resulting table. +func (t *table) fprint(w io.Writer, name string) (n, size int, err error) { + update := func(nn, sz int, e error) { + n += nn + if err == nil { + err = e + } + size += sz + } + // Write arrays needed for the structure. + update(printColElems(w, t.ExpandElem, name+"ExpandElem")) + update(printColElems(w, t.ContractElem, name+"ContractElem")) + update(t.trie.printArrays(w, name)) + update(printArray(t.ContractTries, w, name)) + + nn, e := fmt.Fprintf(w, "// Total size of %sTable is %d bytes\n", name, size) + update(nn, 0, e) + return +} + +func (t *table) fprintIndex(w io.Writer, h *trieHandle, id string) (n int, err error) { + p := func(f string, a ...interface{}) { + nn, e := fmt.Fprintf(w, f, a...) + n += nn + if err == nil { + err = e + } + } + p("\t{ // %s\n", id) + p("\t\tlookupOffset: 0x%x,\n", h.lookupStart) + p("\t\tvaluesOffset: 0x%x,\n", h.valueStart) + p("\t},\n") + return +} + +func printColElems(w io.Writer, a []uint32, name string) (n, sz int, err error) { + p := func(f string, a ...interface{}) { + nn, e := fmt.Fprintf(w, f, a...) + n += nn + if err == nil { + err = e + } + } + sz = len(a) * int(reflect.TypeOf(uint32(0)).Size()) + p("// %s: %d entries, %d bytes\n", name, len(a), sz) + p("var %s = [%d]uint32 {", name, len(a)) + for i, c := range a { + switch { + case i%64 == 0: + p("\n\t// Block %d, offset 0x%x\n", i/64, i) + case (i%64)%6 == 0: + p("\n\t") + } + p("0x%.8X, ", c) + } + p("\n}\n\n") + return +} diff --git a/vendor/golang.org/x/text/collate/build/trie.go b/vendor/golang.org/x/text/collate/build/trie.go new file mode 100644 index 0000000..9404a34 --- /dev/null +++ b/vendor/golang.org/x/text/collate/build/trie.go @@ -0,0 +1,290 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The trie in this file is used to associate the first full character +// in a UTF-8 string to a collation element. +// All but the last byte in a UTF-8 byte sequence are +// used to look up offsets in the index table to be used for the next byte. +// The last byte is used to index into a table of collation elements. +// This file contains the code for the generation of the trie. + +package build + +import ( + "fmt" + "hash/fnv" + "io" + "reflect" +) + +const ( + blockSize = 64 + blockOffset = 2 // Subtract 2 blocks to compensate for the 0x80 added to continuation bytes. +) + +type trieHandle struct { + lookupStart uint16 // offset in table for first byte + valueStart uint16 // offset in table for first byte +} + +type trie struct { + index []uint16 + values []uint32 +} + +// trieNode is the intermediate trie structure used for generating a trie. +type trieNode struct { + index []*trieNode + value []uint32 + b byte + refValue uint16 + refIndex uint16 +} + +func newNode() *trieNode { + return &trieNode{ + index: make([]*trieNode, 64), + value: make([]uint32, 128), // root node size is 128 instead of 64 + } +} + +func (n *trieNode) isInternal() bool { + return n.value != nil +} + +func (n *trieNode) insert(r rune, value uint32) { + const maskx = 0x3F // mask out two most-significant bits + str := string(r) + if len(str) == 1 { + n.value[str[0]] = value + return + } + for i := 0; i < len(str)-1; i++ { + b := str[i] & maskx + if n.index == nil { + n.index = make([]*trieNode, blockSize) + } + nn := n.index[b] + if nn == nil { + nn = &trieNode{} + nn.b = b + n.index[b] = nn + } + n = nn + } + if n.value == nil { + n.value = make([]uint32, blockSize) + } + b := str[len(str)-1] & maskx + n.value[b] = value +} + +type trieBuilder struct { + t *trie + + roots []*trieHandle + + lookupBlocks []*trieNode + valueBlocks []*trieNode + + lookupBlockIdx map[uint32]*trieNode + valueBlockIdx map[uint32]*trieNode +} + +func newTrieBuilder() *trieBuilder { + index := &trieBuilder{} + index.lookupBlocks = make([]*trieNode, 0) + index.valueBlocks = make([]*trieNode, 0) + index.lookupBlockIdx = make(map[uint32]*trieNode) + index.valueBlockIdx = make(map[uint32]*trieNode) + // The third nil is the default null block. The other two blocks + // are used to guarantee an offset of at least 3 for each block. + index.lookupBlocks = append(index.lookupBlocks, nil, nil, nil) + index.t = &trie{} + return index +} + +func (b *trieBuilder) computeOffsets(n *trieNode) *trieNode { + hasher := fnv.New32() + if n.index != nil { + for i, nn := range n.index { + var vi, vv uint16 + if nn != nil { + nn = b.computeOffsets(nn) + n.index[i] = nn + vi = nn.refIndex + vv = nn.refValue + } + hasher.Write([]byte{byte(vi >> 8), byte(vi)}) + hasher.Write([]byte{byte(vv >> 8), byte(vv)}) + } + h := hasher.Sum32() + nn, ok := b.lookupBlockIdx[h] + if !ok { + n.refIndex = uint16(len(b.lookupBlocks)) - blockOffset + b.lookupBlocks = append(b.lookupBlocks, n) + b.lookupBlockIdx[h] = n + } else { + n = nn + } + } else { + for _, v := range n.value { + hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)}) + } + h := hasher.Sum32() + nn, ok := b.valueBlockIdx[h] + if !ok { + n.refValue = uint16(len(b.valueBlocks)) - blockOffset + n.refIndex = n.refValue + b.valueBlocks = append(b.valueBlocks, n) + b.valueBlockIdx[h] = n + } else { + n = nn + } + } + return n +} + +func (b *trieBuilder) addStartValueBlock(n *trieNode) uint16 { + hasher := fnv.New32() + for _, v := range n.value[:2*blockSize] { + hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)}) + } + h := hasher.Sum32() + nn, ok := b.valueBlockIdx[h] + if !ok { + n.refValue = uint16(len(b.valueBlocks)) + n.refIndex = n.refValue + b.valueBlocks = append(b.valueBlocks, n) + // Add a dummy block to accommodate the double block size. + b.valueBlocks = append(b.valueBlocks, nil) + b.valueBlockIdx[h] = n + } else { + n = nn + } + return n.refValue +} + +func genValueBlock(t *trie, n *trieNode) { + if n != nil { + for _, v := range n.value { + t.values = append(t.values, v) + } + } +} + +func genLookupBlock(t *trie, n *trieNode) { + for _, nn := range n.index { + v := uint16(0) + if nn != nil { + if n.index != nil { + v = nn.refIndex + } else { + v = nn.refValue + } + } + t.index = append(t.index, v) + } +} + +func (b *trieBuilder) addTrie(n *trieNode) *trieHandle { + h := &trieHandle{} + b.roots = append(b.roots, h) + h.valueStart = b.addStartValueBlock(n) + if len(b.roots) == 1 { + // We insert a null block after the first start value block. + // This ensures that continuation bytes UTF-8 sequences of length + // greater than 2 will automatically hit a null block if there + // was an undefined entry. + b.valueBlocks = append(b.valueBlocks, nil) + } + n = b.computeOffsets(n) + // Offset by one extra block as the first byte starts at 0xC0 instead of 0x80. + h.lookupStart = n.refIndex - 1 + return h +} + +// generate generates and returns the trie for n. +func (b *trieBuilder) generate() (t *trie, err error) { + t = b.t + if len(b.valueBlocks) >= 1<<16 { + return nil, fmt.Errorf("maximum number of value blocks exceeded (%d > %d)", len(b.valueBlocks), 1<<16) + } + if len(b.lookupBlocks) >= 1<<16 { + return nil, fmt.Errorf("maximum number of lookup blocks exceeded (%d > %d)", len(b.lookupBlocks), 1<<16) + } + genValueBlock(t, b.valueBlocks[0]) + genValueBlock(t, &trieNode{value: make([]uint32, 64)}) + for i := 2; i < len(b.valueBlocks); i++ { + genValueBlock(t, b.valueBlocks[i]) + } + n := &trieNode{index: make([]*trieNode, 64)} + genLookupBlock(t, n) + genLookupBlock(t, n) + genLookupBlock(t, n) + for i := 3; i < len(b.lookupBlocks); i++ { + genLookupBlock(t, b.lookupBlocks[i]) + } + return b.t, nil +} + +func (t *trie) printArrays(w io.Writer, name string) (n, size int, err error) { + p := func(f string, a ...interface{}) { + nn, e := fmt.Fprintf(w, f, a...) + n += nn + if err == nil { + err = e + } + } + nv := len(t.values) + p("// %sValues: %d entries, %d bytes\n", name, nv, nv*4) + p("// Block 2 is the null block.\n") + p("var %sValues = [%d]uint32 {", name, nv) + var printnewline bool + for i, v := range t.values { + if i%blockSize == 0 { + p("\n\t// Block %#x, offset %#x", i/blockSize, i) + } + if i%4 == 0 { + printnewline = true + } + if v != 0 { + if printnewline { + p("\n\t") + printnewline = false + } + p("%#04x:%#08x, ", i, v) + } + } + p("\n}\n\n") + ni := len(t.index) + p("// %sLookup: %d entries, %d bytes\n", name, ni, ni*2) + p("// Block 0 is the null block.\n") + p("var %sLookup = [%d]uint16 {", name, ni) + printnewline = false + for i, v := range t.index { + if i%blockSize == 0 { + p("\n\t// Block %#x, offset %#x", i/blockSize, i) + } + if i%8 == 0 { + printnewline = true + } + if v != 0 { + if printnewline { + p("\n\t") + printnewline = false + } + p("%#03x:%#02x, ", i, v) + } + } + p("\n}\n\n") + return n, nv*4 + ni*2, err +} + +func (t *trie) printStruct(w io.Writer, handle *trieHandle, name string) (n, sz int, err error) { + const msg = "trie{ %sLookup[%d:], %sValues[%d:], %sLookup[:], %sValues[:]}" + n, err = fmt.Fprintf(w, msg, name, handle.lookupStart*blockSize, name, handle.valueStart*blockSize, name, name) + sz += int(reflect.TypeOf(trie{}).Size()) + return +} |