From 7b320119ba532fd409ec7dade7ad02011c309599 Mon Sep 17 00:00:00 2001 From: Niall Sheridan Date: Wed, 18 Oct 2017 13:15:14 +0100 Subject: Update dependencies --- vendor/golang.org/x/text/collate/maketables.go | 553 +++++++++++++++++++++++++ 1 file changed, 553 insertions(+) create mode 100644 vendor/golang.org/x/text/collate/maketables.go (limited to 'vendor/golang.org/x/text/collate/maketables.go') diff --git a/vendor/golang.org/x/text/collate/maketables.go b/vendor/golang.org/x/text/collate/maketables.go new file mode 100644 index 0000000..b4c835e --- /dev/null +++ b/vendor/golang.org/x/text/collate/maketables.go @@ -0,0 +1,553 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +// Collation table generator. +// Data read from the web. + +package main + +import ( + "archive/zip" + "bufio" + "bytes" + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "os" + "regexp" + "sort" + "strconv" + "strings" + "unicode/utf8" + + "golang.org/x/text/collate" + "golang.org/x/text/collate/build" + "golang.org/x/text/internal/colltab" + "golang.org/x/text/internal/gen" + "golang.org/x/text/language" + "golang.org/x/text/unicode/cldr" +) + +var ( + test = flag.Bool("test", false, + "test existing tables; can be used to compare web data with package data.") + short = flag.Bool("short", false, `Use "short" alternatives, when available.`) + draft = flag.Bool("draft", false, `Use draft versions, when available.`) + tags = flag.String("tags", "", "build tags to be included after +build directive") + pkg = flag.String("package", "collate", + "the name of the package in which the generated file is to be included") + + tables = flagStringSetAllowAll("tables", "collate", "collate,chars", + "comma-spearated list of tables to generate.") + exclude = flagStringSet("exclude", "zh2", "", + "comma-separated list of languages to exclude.") + include = flagStringSet("include", "", "", + "comma-separated list of languages to include. Include trumps exclude.") + // TODO: Not included: unihan gb2312han zhuyin big5han (for size reasons) + // TODO: Not included: traditional (buggy for Bengali) + types = flagStringSetAllowAll("types", "standard,phonebook,phonetic,reformed,pinyin,stroke", "", + "comma-separated list of types that should be included.") +) + +// stringSet implements an ordered set based on a list. It implements flag.Value +// to allow a set to be specified as a comma-separated list. +type stringSet struct { + s []string + allowed *stringSet + dirty bool // needs compaction if true + all bool + allowAll bool +} + +func flagStringSet(name, def, allowed, usage string) *stringSet { + ss := &stringSet{} + if allowed != "" { + usage += fmt.Sprintf(" (allowed values: any of %s)", allowed) + ss.allowed = &stringSet{} + failOnError(ss.allowed.Set(allowed)) + } + ss.Set(def) + flag.Var(ss, name, usage) + return ss +} + +func flagStringSetAllowAll(name, def, allowed, usage string) *stringSet { + ss := &stringSet{allowAll: true} + if allowed == "" { + flag.Var(ss, name, usage+fmt.Sprintf(` Use "all" to select all.`)) + } else { + ss.allowed = &stringSet{} + failOnError(ss.allowed.Set(allowed)) + flag.Var(ss, name, usage+fmt.Sprintf(` (allowed values: "all" or any of %s)`, allowed)) + } + ss.Set(def) + return ss +} + +func (ss stringSet) Len() int { + return len(ss.s) +} + +func (ss stringSet) String() string { + return strings.Join(ss.s, ",") +} + +func (ss *stringSet) Set(s string) error { + if ss.allowAll && s == "all" { + ss.s = nil + ss.all = true + return nil + } + ss.s = ss.s[:0] + for _, s := range strings.Split(s, ",") { + if s := strings.TrimSpace(s); s != "" { + if ss.allowed != nil && !ss.allowed.contains(s) { + return fmt.Errorf("unsupported value %q; must be one of %s", s, ss.allowed) + } + ss.add(s) + } + } + ss.compact() + return nil +} + +func (ss *stringSet) add(s string) { + ss.s = append(ss.s, s) + ss.dirty = true +} + +func (ss *stringSet) values() []string { + ss.compact() + return ss.s +} + +func (ss *stringSet) contains(s string) bool { + if ss.all { + return true + } + for _, v := range ss.s { + if v == s { + return true + } + } + return false +} + +func (ss *stringSet) compact() { + if !ss.dirty { + return + } + a := ss.s + sort.Strings(a) + k := 0 + for i := 1; i < len(a); i++ { + if a[k] != a[i] { + a[k+1] = a[i] + k++ + } + } + ss.s = a[:k+1] + ss.dirty = false +} + +func skipLang(l string) bool { + if include.Len() > 0 { + return !include.contains(l) + } + return exclude.contains(l) +} + +// altInclude returns a list of alternatives (for the LDML alt attribute) +// in order of preference. An empty string in this list indicates the +// default entry. +func altInclude() []string { + l := []string{} + if *short { + l = append(l, "short") + } + l = append(l, "") + // TODO: handle draft using cldr.SetDraftLevel + if *draft { + l = append(l, "proposed") + } + return l +} + +func failOnError(e error) { + if e != nil { + log.Panic(e) + } +} + +func openArchive() *zip.Reader { + f := gen.OpenCLDRCoreZip() + buffer, err := ioutil.ReadAll(f) + f.Close() + failOnError(err) + archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer))) + failOnError(err) + return archive +} + +// parseUCA parses a Default Unicode Collation Element Table of the format +// specified in http://www.unicode.org/reports/tr10/#File_Format. +// It returns the variable top. +func parseUCA(builder *build.Builder) { + var r io.ReadCloser + var err error + for _, f := range openArchive().File { + if strings.HasSuffix(f.Name, "allkeys_CLDR.txt") { + r, err = f.Open() + } + } + if r == nil { + log.Fatal("File allkeys_CLDR.txt not found in archive.") + } + failOnError(err) + defer r.Close() + scanner := bufio.NewScanner(r) + colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`) + for i := 1; scanner.Scan(); i++ { + line := scanner.Text() + if len(line) == 0 || line[0] == '#' { + continue + } + if line[0] == '@' { + // parse properties + switch { + case strings.HasPrefix(line[1:], "version "): + a := strings.Split(line[1:], " ") + if a[1] != gen.UnicodeVersion() { + log.Fatalf("incompatible version %s; want %s", a[1], gen.UnicodeVersion()) + } + case strings.HasPrefix(line[1:], "backwards "): + log.Fatalf("%d: unsupported option backwards", i) + default: + log.Printf("%d: unknown option %s", i, line[1:]) + } + } else { + // parse entries + part := strings.Split(line, " ; ") + if len(part) != 2 { + log.Fatalf("%d: production rule without ';': %v", i, line) + } + lhs := []rune{} + for _, v := range strings.Split(part[0], " ") { + if v == "" { + continue + } + lhs = append(lhs, rune(convHex(i, v))) + } + var n int + var vars []int + rhs := [][]int{} + for i, m := range colelem.FindAllStringSubmatch(part[1], -1) { + n += len(m[0]) + elem := []int{} + for _, h := range strings.Split(m[2], ".") { + elem = append(elem, convHex(i, h)) + } + if m[1] == "*" { + vars = append(vars, i) + } + rhs = append(rhs, elem) + } + if len(part[1]) < n+3 || part[1][n+1] != '#' { + log.Fatalf("%d: expected comment; found %s", i, part[1][n:]) + } + if *test { + testInput.add(string(lhs)) + } + failOnError(builder.Add(lhs, rhs, vars)) + } + } + if scanner.Err() != nil { + log.Fatal(scanner.Err()) + } +} + +func convHex(line int, s string) int { + r, e := strconv.ParseInt(s, 16, 32) + if e != nil { + log.Fatalf("%d: %v", line, e) + } + return int(r) +} + +var testInput = stringSet{} + +var charRe = regexp.MustCompile(`&#x([0-9A-F]*);`) +var tagRe = regexp.MustCompile(`<([a-z_]*) */>`) + +var mainLocales = []string{} + +// charsets holds a list of exemplar characters per category. +type charSets map[string][]string + +func (p charSets) fprint(w io.Writer) { + fmt.Fprintln(w, "[exN]string{") + for i, k := range []string{"", "contractions", "punctuation", "auxiliary", "currencySymbol", "index"} { + if set := p[k]; len(set) != 0 { + fmt.Fprintf(w, "\t\t%d: %q,\n", i, strings.Join(set, " ")) + } + } + fmt.Fprintln(w, "\t},") +} + +var localeChars = make(map[string]charSets) + +const exemplarHeader = ` +type exemplarType int +const ( + exCharacters exemplarType = iota + exContractions + exPunctuation + exAuxiliary + exCurrency + exIndex + exN +) +` + +func printExemplarCharacters(w io.Writer) { + fmt.Fprintln(w, exemplarHeader) + fmt.Fprintln(w, "var exemplarCharacters = map[string][exN]string{") + for _, loc := range mainLocales { + fmt.Fprintf(w, "\t%q: ", loc) + localeChars[loc].fprint(w) + } + fmt.Fprintln(w, "}") +} + +func decodeCLDR(d *cldr.Decoder) *cldr.CLDR { + r := gen.OpenCLDRCoreZip() + data, err := d.DecodeZip(r) + failOnError(err) + return data +} + +// parseMain parses XML files in the main directory of the CLDR core.zip file. +func parseMain() { + d := &cldr.Decoder{} + d.SetDirFilter("main") + d.SetSectionFilter("characters") + data := decodeCLDR(d) + for _, loc := range data.Locales() { + x := data.RawLDML(loc) + if skipLang(x.Identity.Language.Type) { + continue + } + if x.Characters != nil { + x, _ = data.LDML(loc) + loc = language.Make(loc).String() + for _, ec := range x.Characters.ExemplarCharacters { + if ec.Draft != "" { + continue + } + if _, ok := localeChars[loc]; !ok { + mainLocales = append(mainLocales, loc) + localeChars[loc] = make(charSets) + } + localeChars[loc][ec.Type] = parseCharacters(ec.Data()) + } + } + } +} + +func parseCharacters(chars string) []string { + parseSingle := func(s string) (r rune, tail string, escaped bool) { + if s[0] == '\\' { + return rune(s[1]), s[2:], true + } + r, sz := utf8.DecodeRuneInString(s) + return r, s[sz:], false + } + chars = strings.TrimSpace(chars) + if n := len(chars) - 1; chars[n] == ']' && chars[0] == '[' { + chars = chars[1:n] + } + list := []string{} + var r, last, end rune + for len(chars) > 0 { + if chars[0] == '{' { // character sequence + buf := []rune{} + for chars = chars[1:]; len(chars) > 0; { + r, chars, _ = parseSingle(chars) + if r == '}' { + break + } + if r == ' ' { + log.Fatalf("space not supported in sequence %q", chars) + } + buf = append(buf, r) + } + list = append(list, string(buf)) + last = 0 + } else { // single character + escaped := false + r, chars, escaped = parseSingle(chars) + if r != ' ' { + if r == '-' && !escaped { + if last == 0 { + log.Fatal("'-' should be preceded by a character") + } + end, chars, _ = parseSingle(chars) + for ; last <= end; last++ { + list = append(list, string(last)) + } + last = 0 + } else { + list = append(list, string(r)) + last = r + } + } + } + } + return list +} + +var fileRe = regexp.MustCompile(`.*/collation/(.*)\.xml`) + +// typeMap translates legacy type keys to their BCP47 equivalent. +var typeMap = map[string]string{ + "phonebook": "phonebk", + "traditional": "trad", +} + +// parseCollation parses XML files in the collation directory of the CLDR core.zip file. +func parseCollation(b *build.Builder) { + d := &cldr.Decoder{} + d.SetDirFilter("collation") + data := decodeCLDR(d) + for _, loc := range data.Locales() { + x, err := data.LDML(loc) + failOnError(err) + if skipLang(x.Identity.Language.Type) { + continue + } + cs := x.Collations.Collation + sl := cldr.MakeSlice(&cs) + if len(types.s) == 0 { + sl.SelectAnyOf("type", x.Collations.Default()) + } else if !types.all { + sl.SelectAnyOf("type", types.s...) + } + sl.SelectOnePerGroup("alt", altInclude()) + + for _, c := range cs { + id, err := language.Parse(loc) + if err != nil { + fmt.Fprintf(os.Stderr, "invalid locale: %q", err) + continue + } + // Support both old- and new-style defaults. + d := c.Type + if x.Collations.DefaultCollation == nil { + d = x.Collations.Default() + } else { + d = x.Collations.DefaultCollation.Data() + } + // We assume tables are being built either for search or collation, + // but not both. For search the default is always "search". + if d != c.Type && c.Type != "search" { + typ := c.Type + if len(c.Type) > 8 { + typ = typeMap[c.Type] + } + id, err = id.SetTypeForKey("co", typ) + failOnError(err) + } + t := b.Tailoring(id) + c.Process(processor{t}) + } + } +} + +type processor struct { + t *build.Tailoring +} + +func (p processor) Reset(anchor string, before int) (err error) { + if before != 0 { + err = p.t.SetAnchorBefore(anchor) + } else { + err = p.t.SetAnchor(anchor) + } + failOnError(err) + return nil +} + +func (p processor) Insert(level int, str, context, extend string) error { + str = context + str + if *test { + testInput.add(str) + } + // TODO: mimic bug in old maketables: remove. + err := p.t.Insert(colltab.Level(level-1), str, context+extend) + failOnError(err) + return nil +} + +func (p processor) Index(id string) { +} + +func testCollator(c *collate.Collator) { + c0 := collate.New(language.Und) + + // iterator over all characters for all locales and check + // whether Key is equal. + buf := collate.Buffer{} + + // Add all common and not too uncommon runes to the test set. + for i := rune(0); i < 0x30000; i++ { + testInput.add(string(i)) + } + for i := rune(0xE0000); i < 0xF0000; i++ { + testInput.add(string(i)) + } + for _, str := range testInput.values() { + k0 := c0.KeyFromString(&buf, str) + k := c.KeyFromString(&buf, str) + if !bytes.Equal(k0, k) { + failOnError(fmt.Errorf("test:%U: keys differ (%x vs %x)", []rune(str), k0, k)) + } + buf.Reset() + } + fmt.Println("PASS") +} + +func main() { + gen.Init() + b := build.NewBuilder() + parseUCA(b) + if tables.contains("chars") { + parseMain() + } + parseCollation(b) + + c, err := b.Build() + failOnError(err) + + if *test { + testCollator(collate.NewFromTable(c)) + } else { + w := &bytes.Buffer{} + + gen.WriteUnicodeVersion(w) + gen.WriteCLDRVersion(w) + + if tables.contains("collate") { + _, err = b.Print(w) + failOnError(err) + } + if tables.contains("chars") { + printExemplarCharacters(w) + } + gen.WriteGoFile("tables.go", *pkg, w.Bytes()) + } +} -- cgit v1.2.3