From 8c12c6939aab9106db14ec2d11d983bc5b29fb2c Mon Sep 17 00:00:00 2001 From: Niall Sheridan Date: Sun, 7 Jul 2019 21:33:44 +0100 Subject: Switch to modules --- vendor/golang.org/x/text/unicode/norm/normalize.go | 609 --------------------- 1 file changed, 609 deletions(-) delete mode 100644 vendor/golang.org/x/text/unicode/norm/normalize.go (limited to 'vendor/golang.org/x/text/unicode/norm/normalize.go') diff --git a/vendor/golang.org/x/text/unicode/norm/normalize.go b/vendor/golang.org/x/text/unicode/norm/normalize.go deleted file mode 100644 index 95efcf2..0000000 --- a/vendor/golang.org/x/text/unicode/norm/normalize.go +++ /dev/null @@ -1,609 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Note: the file data_test.go that is generated should not be checked in. -//go:generate go run maketables.go triegen.go -//go:generate go test -tags test - -// Package norm contains types and functions for normalizing Unicode strings. -package norm // import "golang.org/x/text/unicode/norm" - -import ( - "unicode/utf8" - - "golang.org/x/text/transform" -) - -// A Form denotes a canonical representation of Unicode code points. -// The Unicode-defined normalization and equivalence forms are: -// -// NFC Unicode Normalization Form C -// NFD Unicode Normalization Form D -// NFKC Unicode Normalization Form KC -// NFKD Unicode Normalization Form KD -// -// For a Form f, this documentation uses the notation f(x) to mean -// the bytes or string x converted to the given form. -// A position n in x is called a boundary if conversion to the form can -// proceed independently on both sides: -// f(x) == append(f(x[0:n]), f(x[n:])...) -// -// References: https://unicode.org/reports/tr15/ and -// https://unicode.org/notes/tn5/. -type Form int - -const ( - NFC Form = iota - NFD - NFKC - NFKD -) - -// Bytes returns f(b). May return b if f(b) = b. -func (f Form) Bytes(b []byte) []byte { - src := inputBytes(b) - ft := formTable[f] - n, ok := ft.quickSpan(src, 0, len(b), true) - if ok { - return b - } - out := make([]byte, n, len(b)) - copy(out, b[0:n]) - rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush} - return doAppendInner(&rb, n) -} - -// String returns f(s). -func (f Form) String(s string) string { - src := inputString(s) - ft := formTable[f] - n, ok := ft.quickSpan(src, 0, len(s), true) - if ok { - return s - } - out := make([]byte, n, len(s)) - copy(out, s[0:n]) - rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush} - return string(doAppendInner(&rb, n)) -} - -// IsNormal returns true if b == f(b). -func (f Form) IsNormal(b []byte) bool { - src := inputBytes(b) - ft := formTable[f] - bp, ok := ft.quickSpan(src, 0, len(b), true) - if ok { - return true - } - rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)} - rb.setFlusher(nil, cmpNormalBytes) - for bp < len(b) { - rb.out = b[bp:] - if bp = decomposeSegment(&rb, bp, true); bp < 0 { - return false - } - bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true) - } - return true -} - -func cmpNormalBytes(rb *reorderBuffer) bool { - b := rb.out - for i := 0; i < rb.nrune; i++ { - info := rb.rune[i] - if int(info.size) > len(b) { - return false - } - p := info.pos - pe := p + info.size - for ; p < pe; p++ { - if b[0] != rb.byte[p] { - return false - } - b = b[1:] - } - } - return true -} - -// IsNormalString returns true if s == f(s). -func (f Form) IsNormalString(s string) bool { - src := inputString(s) - ft := formTable[f] - bp, ok := ft.quickSpan(src, 0, len(s), true) - if ok { - return true - } - rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)} - rb.setFlusher(nil, func(rb *reorderBuffer) bool { - for i := 0; i < rb.nrune; i++ { - info := rb.rune[i] - if bp+int(info.size) > len(s) { - return false - } - p := info.pos - pe := p + info.size - for ; p < pe; p++ { - if s[bp] != rb.byte[p] { - return false - } - bp++ - } - } - return true - }) - for bp < len(s) { - if bp = decomposeSegment(&rb, bp, true); bp < 0 { - return false - } - bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true) - } - return true -} - -// patchTail fixes a case where a rune may be incorrectly normalized -// if it is followed by illegal continuation bytes. It returns the -// patched buffer and whether the decomposition is still in progress. -func patchTail(rb *reorderBuffer) bool { - info, p := lastRuneStart(&rb.f, rb.out) - if p == -1 || info.size == 0 { - return true - } - end := p + int(info.size) - extra := len(rb.out) - end - if extra > 0 { - // Potentially allocating memory. However, this only - // happens with ill-formed UTF-8. - x := make([]byte, 0) - x = append(x, rb.out[len(rb.out)-extra:]...) - rb.out = rb.out[:end] - decomposeToLastBoundary(rb) - rb.doFlush() - rb.out = append(rb.out, x...) - return false - } - buf := rb.out[p:] - rb.out = rb.out[:p] - decomposeToLastBoundary(rb) - if s := rb.ss.next(info); s == ssStarter { - rb.doFlush() - rb.ss.first(info) - } else if s == ssOverflow { - rb.doFlush() - rb.insertCGJ() - rb.ss = 0 - } - rb.insertUnsafe(inputBytes(buf), 0, info) - return true -} - -func appendQuick(rb *reorderBuffer, i int) int { - if rb.nsrc == i { - return i - } - end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true) - rb.out = rb.src.appendSlice(rb.out, i, end) - return end -} - -// Append returns f(append(out, b...)). -// The buffer out must be nil, empty, or equal to f(out). -func (f Form) Append(out []byte, src ...byte) []byte { - return f.doAppend(out, inputBytes(src), len(src)) -} - -func (f Form) doAppend(out []byte, src input, n int) []byte { - if n == 0 { - return out - } - ft := formTable[f] - // Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer. - if len(out) == 0 { - p, _ := ft.quickSpan(src, 0, n, true) - out = src.appendSlice(out, 0, p) - if p == n { - return out - } - rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush} - return doAppendInner(&rb, p) - } - rb := reorderBuffer{f: *ft, src: src, nsrc: n} - return doAppend(&rb, out, 0) -} - -func doAppend(rb *reorderBuffer, out []byte, p int) []byte { - rb.setFlusher(out, appendFlush) - src, n := rb.src, rb.nsrc - doMerge := len(out) > 0 - if q := src.skipContinuationBytes(p); q > p { - // Move leading non-starters to destination. - rb.out = src.appendSlice(rb.out, p, q) - p = q - doMerge = patchTail(rb) - } - fd := &rb.f - if doMerge { - var info Properties - if p < n { - info = fd.info(src, p) - if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 { - if p == 0 { - decomposeToLastBoundary(rb) - } - p = decomposeSegment(rb, p, true) - } - } - if info.size == 0 { - rb.doFlush() - // Append incomplete UTF-8 encoding. - return src.appendSlice(rb.out, p, n) - } - if rb.nrune > 0 { - return doAppendInner(rb, p) - } - } - p = appendQuick(rb, p) - return doAppendInner(rb, p) -} - -func doAppendInner(rb *reorderBuffer, p int) []byte { - for n := rb.nsrc; p < n; { - p = decomposeSegment(rb, p, true) - p = appendQuick(rb, p) - } - return rb.out -} - -// AppendString returns f(append(out, []byte(s))). -// The buffer out must be nil, empty, or equal to f(out). -func (f Form) AppendString(out []byte, src string) []byte { - return f.doAppend(out, inputString(src), len(src)) -} - -// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]). -// It is not guaranteed to return the largest such n. -func (f Form) QuickSpan(b []byte) int { - n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true) - return n -} - -// Span implements transform.SpanningTransformer. It returns a boundary n such -// that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n. -func (f Form) Span(b []byte, atEOF bool) (n int, err error) { - n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF) - if n < len(b) { - if !ok { - err = transform.ErrEndOfSpan - } else { - err = transform.ErrShortSrc - } - } - return n, err -} - -// SpanString returns a boundary n such that s[0:n] == f(s[0:n]). -// It is not guaranteed to return the largest such n. -func (f Form) SpanString(s string, atEOF bool) (n int, err error) { - n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF) - if n < len(s) { - if !ok { - err = transform.ErrEndOfSpan - } else { - err = transform.ErrShortSrc - } - } - return n, err -} - -// quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and -// whether any non-normalized parts were found. If atEOF is false, n will -// not point past the last segment if this segment might be become -// non-normalized by appending other runes. -func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) { - var lastCC uint8 - ss := streamSafe(0) - lastSegStart := i - for n = end; i < n; { - if j := src.skipASCII(i, n); i != j { - i = j - lastSegStart = i - 1 - lastCC = 0 - ss = 0 - continue - } - info := f.info(src, i) - if info.size == 0 { - if atEOF { - // include incomplete runes - return n, true - } - return lastSegStart, true - } - // This block needs to be before the next, because it is possible to - // have an overflow for runes that are starters (e.g. with U+FF9E). - switch ss.next(info) { - case ssStarter: - lastSegStart = i - case ssOverflow: - return lastSegStart, false - case ssSuccess: - if lastCC > info.ccc { - return lastSegStart, false - } - } - if f.composing { - if !info.isYesC() { - break - } - } else { - if !info.isYesD() { - break - } - } - lastCC = info.ccc - i += int(info.size) - } - if i == n { - if !atEOF { - n = lastSegStart - } - return n, true - } - return lastSegStart, false -} - -// QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]). -// It is not guaranteed to return the largest such n. -func (f Form) QuickSpanString(s string) int { - n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true) - return n -} - -// FirstBoundary returns the position i of the first boundary in b -// or -1 if b contains no boundary. -func (f Form) FirstBoundary(b []byte) int { - return f.firstBoundary(inputBytes(b), len(b)) -} - -func (f Form) firstBoundary(src input, nsrc int) int { - i := src.skipContinuationBytes(0) - if i >= nsrc { - return -1 - } - fd := formTable[f] - ss := streamSafe(0) - // We should call ss.first here, but we can't as the first rune is - // skipped already. This means FirstBoundary can't really determine - // CGJ insertion points correctly. Luckily it doesn't have to. - for { - info := fd.info(src, i) - if info.size == 0 { - return -1 - } - if s := ss.next(info); s != ssSuccess { - return i - } - i += int(info.size) - if i >= nsrc { - if !info.BoundaryAfter() && !ss.isMax() { - return -1 - } - return nsrc - } - } -} - -// FirstBoundaryInString returns the position i of the first boundary in s -// or -1 if s contains no boundary. -func (f Form) FirstBoundaryInString(s string) int { - return f.firstBoundary(inputString(s), len(s)) -} - -// NextBoundary reports the index of the boundary between the first and next -// segment in b or -1 if atEOF is false and there are not enough bytes to -// determine this boundary. -func (f Form) NextBoundary(b []byte, atEOF bool) int { - return f.nextBoundary(inputBytes(b), len(b), atEOF) -} - -// NextBoundaryInString reports the index of the boundary between the first and -// next segment in b or -1 if atEOF is false and there are not enough bytes to -// determine this boundary. -func (f Form) NextBoundaryInString(s string, atEOF bool) int { - return f.nextBoundary(inputString(s), len(s), atEOF) -} - -func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int { - if nsrc == 0 { - if atEOF { - return 0 - } - return -1 - } - fd := formTable[f] - info := fd.info(src, 0) - if info.size == 0 { - if atEOF { - return 1 - } - return -1 - } - ss := streamSafe(0) - ss.first(info) - - for i := int(info.size); i < nsrc; i += int(info.size) { - info = fd.info(src, i) - if info.size == 0 { - if atEOF { - return i - } - return -1 - } - // TODO: Using streamSafe to determine the boundary isn't the same as - // using BoundaryBefore. Determine which should be used. - if s := ss.next(info); s != ssSuccess { - return i - } - } - if !atEOF && !info.BoundaryAfter() && !ss.isMax() { - return -1 - } - return nsrc -} - -// LastBoundary returns the position i of the last boundary in b -// or -1 if b contains no boundary. -func (f Form) LastBoundary(b []byte) int { - return lastBoundary(formTable[f], b) -} - -func lastBoundary(fd *formInfo, b []byte) int { - i := len(b) - info, p := lastRuneStart(fd, b) - if p == -1 { - return -1 - } - if info.size == 0 { // ends with incomplete rune - if p == 0 { // starts with incomplete rune - return -1 - } - i = p - info, p = lastRuneStart(fd, b[:i]) - if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter - return i - } - } - if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8 - return i - } - if info.BoundaryAfter() { - return i - } - ss := streamSafe(0) - v := ss.backwards(info) - for i = p; i >= 0 && v != ssStarter; i = p { - info, p = lastRuneStart(fd, b[:i]) - if v = ss.backwards(info); v == ssOverflow { - break - } - if p+int(info.size) != i { - if p == -1 { // no boundary found - return -1 - } - return i // boundary after an illegal UTF-8 encoding - } - } - return i -} - -// decomposeSegment scans the first segment in src into rb. It inserts 0x034f -// (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters -// and returns the number of bytes consumed from src or iShortDst or iShortSrc. -func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int { - // Force one character to be consumed. - info := rb.f.info(rb.src, sp) - if info.size == 0 { - return 0 - } - if s := rb.ss.next(info); s == ssStarter { - // TODO: this could be removed if we don't support merging. - if rb.nrune > 0 { - goto end - } - } else if s == ssOverflow { - rb.insertCGJ() - goto end - } - if err := rb.insertFlush(rb.src, sp, info); err != iSuccess { - return int(err) - } - for { - sp += int(info.size) - if sp >= rb.nsrc { - if !atEOF && !info.BoundaryAfter() { - return int(iShortSrc) - } - break - } - info = rb.f.info(rb.src, sp) - if info.size == 0 { - if !atEOF { - return int(iShortSrc) - } - break - } - if s := rb.ss.next(info); s == ssStarter { - break - } else if s == ssOverflow { - rb.insertCGJ() - break - } - if err := rb.insertFlush(rb.src, sp, info); err != iSuccess { - return int(err) - } - } -end: - if !rb.doFlush() { - return int(iShortDst) - } - return sp -} - -// lastRuneStart returns the runeInfo and position of the last -// rune in buf or the zero runeInfo and -1 if no rune was found. -func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) { - p := len(buf) - 1 - for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- { - } - if p < 0 { - return Properties{}, -1 - } - return fd.info(inputBytes(buf), p), p -} - -// decomposeToLastBoundary finds an open segment at the end of the buffer -// and scans it into rb. Returns the buffer minus the last segment. -func decomposeToLastBoundary(rb *reorderBuffer) { - fd := &rb.f - info, i := lastRuneStart(fd, rb.out) - if int(info.size) != len(rb.out)-i { - // illegal trailing continuation bytes - return - } - if info.BoundaryAfter() { - return - } - var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order - padd := 0 - ss := streamSafe(0) - p := len(rb.out) - for { - add[padd] = info - v := ss.backwards(info) - if v == ssOverflow { - // Note that if we have an overflow, it the string we are appending to - // is not correctly normalized. In this case the behavior is undefined. - break - } - padd++ - p -= int(info.size) - if v == ssStarter || p < 0 { - break - } - info, i = lastRuneStart(fd, rb.out[:p]) - if int(info.size) != p-i { - break - } - } - rb.ss = ss - // Copy bytes for insertion as we may need to overwrite rb.out. - var buf [maxBufferSize * utf8.UTFMax]byte - cp := buf[:copy(buf[:], rb.out[p:])] - rb.out = rb.out[:p] - for padd--; padd >= 0; padd-- { - info = add[padd] - rb.insertUnsafe(inputBytes(cp), 0, info) - cp = cp[info.size:] - } -} -- cgit v1.2.3