From 8c12c6939aab9106db14ec2d11d983bc5b29fb2c Mon Sep 17 00:00:00 2001 From: Niall Sheridan Date: Sun, 7 Jul 2019 21:33:44 +0100 Subject: Switch to modules --- vendor/golang.org/x/text/unicode/norm/iter.go | 457 -------------------------- 1 file changed, 457 deletions(-) delete mode 100644 vendor/golang.org/x/text/unicode/norm/iter.go (limited to 'vendor/golang.org/x/text/unicode/norm/iter.go') diff --git a/vendor/golang.org/x/text/unicode/norm/iter.go b/vendor/golang.org/x/text/unicode/norm/iter.go deleted file mode 100644 index ce17f96..0000000 --- a/vendor/golang.org/x/text/unicode/norm/iter.go +++ /dev/null @@ -1,457 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package norm - -import ( - "fmt" - "unicode/utf8" -) - -// MaxSegmentSize is the maximum size of a byte buffer needed to consider any -// sequence of starter and non-starter runes for the purpose of normalization. -const MaxSegmentSize = maxByteBufferSize - -// An Iter iterates over a string or byte slice, while normalizing it -// to a given Form. -type Iter struct { - rb reorderBuffer - buf [maxByteBufferSize]byte - info Properties // first character saved from previous iteration - next iterFunc // implementation of next depends on form - asciiF iterFunc - - p int // current position in input source - multiSeg []byte // remainder of multi-segment decomposition -} - -type iterFunc func(*Iter) []byte - -// Init initializes i to iterate over src after normalizing it to Form f. -func (i *Iter) Init(f Form, src []byte) { - i.p = 0 - if len(src) == 0 { - i.setDone() - i.rb.nsrc = 0 - return - } - i.multiSeg = nil - i.rb.init(f, src) - i.next = i.rb.f.nextMain - i.asciiF = nextASCIIBytes - i.info = i.rb.f.info(i.rb.src, i.p) - i.rb.ss.first(i.info) -} - -// InitString initializes i to iterate over src after normalizing it to Form f. -func (i *Iter) InitString(f Form, src string) { - i.p = 0 - if len(src) == 0 { - i.setDone() - i.rb.nsrc = 0 - return - } - i.multiSeg = nil - i.rb.initString(f, src) - i.next = i.rb.f.nextMain - i.asciiF = nextASCIIString - i.info = i.rb.f.info(i.rb.src, i.p) - i.rb.ss.first(i.info) -} - -// Seek sets the segment to be returned by the next call to Next to start -// at position p. It is the responsibility of the caller to set p to the -// start of a segment. -func (i *Iter) Seek(offset int64, whence int) (int64, error) { - var abs int64 - switch whence { - case 0: - abs = offset - case 1: - abs = int64(i.p) + offset - case 2: - abs = int64(i.rb.nsrc) + offset - default: - return 0, fmt.Errorf("norm: invalid whence") - } - if abs < 0 { - return 0, fmt.Errorf("norm: negative position") - } - if int(abs) >= i.rb.nsrc { - i.setDone() - return int64(i.p), nil - } - i.p = int(abs) - i.multiSeg = nil - i.next = i.rb.f.nextMain - i.info = i.rb.f.info(i.rb.src, i.p) - i.rb.ss.first(i.info) - return abs, nil -} - -// returnSlice returns a slice of the underlying input type as a byte slice. -// If the underlying is of type []byte, it will simply return a slice. -// If the underlying is of type string, it will copy the slice to the buffer -// and return that. -func (i *Iter) returnSlice(a, b int) []byte { - if i.rb.src.bytes == nil { - return i.buf[:copy(i.buf[:], i.rb.src.str[a:b])] - } - return i.rb.src.bytes[a:b] -} - -// Pos returns the byte position at which the next call to Next will commence processing. -func (i *Iter) Pos() int { - return i.p -} - -func (i *Iter) setDone() { - i.next = nextDone - i.p = i.rb.nsrc -} - -// Done returns true if there is no more input to process. -func (i *Iter) Done() bool { - return i.p >= i.rb.nsrc -} - -// Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input. -// For any input a and b for which f(a) == f(b), subsequent calls -// to Next will return the same segments. -// Modifying runes are grouped together with the preceding starter, if such a starter exists. -// Although not guaranteed, n will typically be the smallest possible n. -func (i *Iter) Next() []byte { - return i.next(i) -} - -func nextASCIIBytes(i *Iter) []byte { - p := i.p + 1 - if p >= i.rb.nsrc { - i.setDone() - return i.rb.src.bytes[i.p:p] - } - if i.rb.src.bytes[p] < utf8.RuneSelf { - p0 := i.p - i.p = p - return i.rb.src.bytes[p0:p] - } - i.info = i.rb.f.info(i.rb.src, i.p) - i.next = i.rb.f.nextMain - return i.next(i) -} - -func nextASCIIString(i *Iter) []byte { - p := i.p + 1 - if p >= i.rb.nsrc { - i.buf[0] = i.rb.src.str[i.p] - i.setDone() - return i.buf[:1] - } - if i.rb.src.str[p] < utf8.RuneSelf { - i.buf[0] = i.rb.src.str[i.p] - i.p = p - return i.buf[:1] - } - i.info = i.rb.f.info(i.rb.src, i.p) - i.next = i.rb.f.nextMain - return i.next(i) -} - -func nextHangul(i *Iter) []byte { - p := i.p - next := p + hangulUTF8Size - if next >= i.rb.nsrc { - i.setDone() - } else if i.rb.src.hangul(next) == 0 { - i.rb.ss.next(i.info) - i.info = i.rb.f.info(i.rb.src, i.p) - i.next = i.rb.f.nextMain - return i.next(i) - } - i.p = next - return i.buf[:decomposeHangul(i.buf[:], i.rb.src.hangul(p))] -} - -func nextDone(i *Iter) []byte { - return nil -} - -// nextMulti is used for iterating over multi-segment decompositions -// for decomposing normal forms. -func nextMulti(i *Iter) []byte { - j := 0 - d := i.multiSeg - // skip first rune - for j = 1; j < len(d) && !utf8.RuneStart(d[j]); j++ { - } - for j < len(d) { - info := i.rb.f.info(input{bytes: d}, j) - if info.BoundaryBefore() { - i.multiSeg = d[j:] - return d[:j] - } - j += int(info.size) - } - // treat last segment as normal decomposition - i.next = i.rb.f.nextMain - return i.next(i) -} - -// nextMultiNorm is used for iterating over multi-segment decompositions -// for composing normal forms. -func nextMultiNorm(i *Iter) []byte { - j := 0 - d := i.multiSeg - for j < len(d) { - info := i.rb.f.info(input{bytes: d}, j) - if info.BoundaryBefore() { - i.rb.compose() - seg := i.buf[:i.rb.flushCopy(i.buf[:])] - i.rb.insertUnsafe(input{bytes: d}, j, info) - i.multiSeg = d[j+int(info.size):] - return seg - } - i.rb.insertUnsafe(input{bytes: d}, j, info) - j += int(info.size) - } - i.multiSeg = nil - i.next = nextComposed - return doNormComposed(i) -} - -// nextDecomposed is the implementation of Next for forms NFD and NFKD. -func nextDecomposed(i *Iter) (next []byte) { - outp := 0 - inCopyStart, outCopyStart := i.p, 0 - for { - if sz := int(i.info.size); sz <= 1 { - i.rb.ss = 0 - p := i.p - i.p++ // ASCII or illegal byte. Either way, advance by 1. - if i.p >= i.rb.nsrc { - i.setDone() - return i.returnSlice(p, i.p) - } else if i.rb.src._byte(i.p) < utf8.RuneSelf { - i.next = i.asciiF - return i.returnSlice(p, i.p) - } - outp++ - } else if d := i.info.Decomposition(); d != nil { - // Note: If leading CCC != 0, then len(d) == 2 and last is also non-zero. - // Case 1: there is a leftover to copy. In this case the decomposition - // must begin with a modifier and should always be appended. - // Case 2: no leftover. Simply return d if followed by a ccc == 0 value. - p := outp + len(d) - if outp > 0 { - i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p) - // TODO: this condition should not be possible, but we leave it - // in for defensive purposes. - if p > len(i.buf) { - return i.buf[:outp] - } - } else if i.info.multiSegment() { - // outp must be 0 as multi-segment decompositions always - // start a new segment. - if i.multiSeg == nil { - i.multiSeg = d - i.next = nextMulti - return nextMulti(i) - } - // We are in the last segment. Treat as normal decomposition. - d = i.multiSeg - i.multiSeg = nil - p = len(d) - } - prevCC := i.info.tccc - if i.p += sz; i.p >= i.rb.nsrc { - i.setDone() - i.info = Properties{} // Force BoundaryBefore to succeed. - } else { - i.info = i.rb.f.info(i.rb.src, i.p) - } - switch i.rb.ss.next(i.info) { - case ssOverflow: - i.next = nextCGJDecompose - fallthrough - case ssStarter: - if outp > 0 { - copy(i.buf[outp:], d) - return i.buf[:p] - } - return d - } - copy(i.buf[outp:], d) - outp = p - inCopyStart, outCopyStart = i.p, outp - if i.info.ccc < prevCC { - goto doNorm - } - continue - } else if r := i.rb.src.hangul(i.p); r != 0 { - outp = decomposeHangul(i.buf[:], r) - i.p += hangulUTF8Size - inCopyStart, outCopyStart = i.p, outp - if i.p >= i.rb.nsrc { - i.setDone() - break - } else if i.rb.src.hangul(i.p) != 0 { - i.next = nextHangul - return i.buf[:outp] - } - } else { - p := outp + sz - if p > len(i.buf) { - break - } - outp = p - i.p += sz - } - if i.p >= i.rb.nsrc { - i.setDone() - break - } - prevCC := i.info.tccc - i.info = i.rb.f.info(i.rb.src, i.p) - if v := i.rb.ss.next(i.info); v == ssStarter { - break - } else if v == ssOverflow { - i.next = nextCGJDecompose - break - } - if i.info.ccc < prevCC { - goto doNorm - } - } - if outCopyStart == 0 { - return i.returnSlice(inCopyStart, i.p) - } else if inCopyStart < i.p { - i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p) - } - return i.buf[:outp] -doNorm: - // Insert what we have decomposed so far in the reorderBuffer. - // As we will only reorder, there will always be enough room. - i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p) - i.rb.insertDecomposed(i.buf[0:outp]) - return doNormDecomposed(i) -} - -func doNormDecomposed(i *Iter) []byte { - for { - i.rb.insertUnsafe(i.rb.src, i.p, i.info) - if i.p += int(i.info.size); i.p >= i.rb.nsrc { - i.setDone() - break - } - i.info = i.rb.f.info(i.rb.src, i.p) - if i.info.ccc == 0 { - break - } - if s := i.rb.ss.next(i.info); s == ssOverflow { - i.next = nextCGJDecompose - break - } - } - // new segment or too many combining characters: exit normalization - return i.buf[:i.rb.flushCopy(i.buf[:])] -} - -func nextCGJDecompose(i *Iter) []byte { - i.rb.ss = 0 - i.rb.insertCGJ() - i.next = nextDecomposed - i.rb.ss.first(i.info) - buf := doNormDecomposed(i) - return buf -} - -// nextComposed is the implementation of Next for forms NFC and NFKC. -func nextComposed(i *Iter) []byte { - outp, startp := 0, i.p - var prevCC uint8 - for { - if !i.info.isYesC() { - goto doNorm - } - prevCC = i.info.tccc - sz := int(i.info.size) - if sz == 0 { - sz = 1 // illegal rune: copy byte-by-byte - } - p := outp + sz - if p > len(i.buf) { - break - } - outp = p - i.p += sz - if i.p >= i.rb.nsrc { - i.setDone() - break - } else if i.rb.src._byte(i.p) < utf8.RuneSelf { - i.rb.ss = 0 - i.next = i.asciiF - break - } - i.info = i.rb.f.info(i.rb.src, i.p) - if v := i.rb.ss.next(i.info); v == ssStarter { - break - } else if v == ssOverflow { - i.next = nextCGJCompose - break - } - if i.info.ccc < prevCC { - goto doNorm - } - } - return i.returnSlice(startp, i.p) -doNorm: - // reset to start position - i.p = startp - i.info = i.rb.f.info(i.rb.src, i.p) - i.rb.ss.first(i.info) - if i.info.multiSegment() { - d := i.info.Decomposition() - info := i.rb.f.info(input{bytes: d}, 0) - i.rb.insertUnsafe(input{bytes: d}, 0, info) - i.multiSeg = d[int(info.size):] - i.next = nextMultiNorm - return nextMultiNorm(i) - } - i.rb.ss.first(i.info) - i.rb.insertUnsafe(i.rb.src, i.p, i.info) - return doNormComposed(i) -} - -func doNormComposed(i *Iter) []byte { - // First rune should already be inserted. - for { - if i.p += int(i.info.size); i.p >= i.rb.nsrc { - i.setDone() - break - } - i.info = i.rb.f.info(i.rb.src, i.p) - if s := i.rb.ss.next(i.info); s == ssStarter { - break - } else if s == ssOverflow { - i.next = nextCGJCompose - break - } - i.rb.insertUnsafe(i.rb.src, i.p, i.info) - } - i.rb.compose() - seg := i.buf[:i.rb.flushCopy(i.buf[:])] - return seg -} - -func nextCGJCompose(i *Iter) []byte { - i.rb.ss = 0 // instead of first - i.rb.insertCGJ() - i.next = nextComposed - // Note that we treat any rune with nLeadingNonStarters > 0 as a non-starter, - // even if they are not. This is particularly dubious for U+FF9E and UFF9A. - // If we ever change that, insert a check here. - i.rb.ss.first(i.info) - i.rb.insertUnsafe(i.rb.src, i.p, i.info) - return doNormComposed(i) -} -- cgit v1.2.3