aboutsummaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/unicode/norm/normalize.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/unicode/norm/normalize.go')
-rw-r--r--vendor/golang.org/x/text/unicode/norm/normalize.go609
1 files changed, 0 insertions, 609 deletions
diff --git a/vendor/golang.org/x/text/unicode/norm/normalize.go b/vendor/golang.org/x/text/unicode/norm/normalize.go
deleted file mode 100644
index 95efcf2..0000000
--- a/vendor/golang.org/x/text/unicode/norm/normalize.go
+++ /dev/null
@@ -1,609 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Note: the file data_test.go that is generated should not be checked in.
-//go:generate go run maketables.go triegen.go
-//go:generate go test -tags test
-
-// Package norm contains types and functions for normalizing Unicode strings.
-package norm // import "golang.org/x/text/unicode/norm"
-
-import (
- "unicode/utf8"
-
- "golang.org/x/text/transform"
-)
-
-// A Form denotes a canonical representation of Unicode code points.
-// The Unicode-defined normalization and equivalence forms are:
-//
-// NFC Unicode Normalization Form C
-// NFD Unicode Normalization Form D
-// NFKC Unicode Normalization Form KC
-// NFKD Unicode Normalization Form KD
-//
-// For a Form f, this documentation uses the notation f(x) to mean
-// the bytes or string x converted to the given form.
-// A position n in x is called a boundary if conversion to the form can
-// proceed independently on both sides:
-// f(x) == append(f(x[0:n]), f(x[n:])...)
-//
-// References: https://unicode.org/reports/tr15/ and
-// https://unicode.org/notes/tn5/.
-type Form int
-
-const (
- NFC Form = iota
- NFD
- NFKC
- NFKD
-)
-
-// Bytes returns f(b). May return b if f(b) = b.
-func (f Form) Bytes(b []byte) []byte {
- src := inputBytes(b)
- ft := formTable[f]
- n, ok := ft.quickSpan(src, 0, len(b), true)
- if ok {
- return b
- }
- out := make([]byte, n, len(b))
- copy(out, b[0:n])
- rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush}
- return doAppendInner(&rb, n)
-}
-
-// String returns f(s).
-func (f Form) String(s string) string {
- src := inputString(s)
- ft := formTable[f]
- n, ok := ft.quickSpan(src, 0, len(s), true)
- if ok {
- return s
- }
- out := make([]byte, n, len(s))
- copy(out, s[0:n])
- rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush}
- return string(doAppendInner(&rb, n))
-}
-
-// IsNormal returns true if b == f(b).
-func (f Form) IsNormal(b []byte) bool {
- src := inputBytes(b)
- ft := formTable[f]
- bp, ok := ft.quickSpan(src, 0, len(b), true)
- if ok {
- return true
- }
- rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)}
- rb.setFlusher(nil, cmpNormalBytes)
- for bp < len(b) {
- rb.out = b[bp:]
- if bp = decomposeSegment(&rb, bp, true); bp < 0 {
- return false
- }
- bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true)
- }
- return true
-}
-
-func cmpNormalBytes(rb *reorderBuffer) bool {
- b := rb.out
- for i := 0; i < rb.nrune; i++ {
- info := rb.rune[i]
- if int(info.size) > len(b) {
- return false
- }
- p := info.pos
- pe := p + info.size
- for ; p < pe; p++ {
- if b[0] != rb.byte[p] {
- return false
- }
- b = b[1:]
- }
- }
- return true
-}
-
-// IsNormalString returns true if s == f(s).
-func (f Form) IsNormalString(s string) bool {
- src := inputString(s)
- ft := formTable[f]
- bp, ok := ft.quickSpan(src, 0, len(s), true)
- if ok {
- return true
- }
- rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)}
- rb.setFlusher(nil, func(rb *reorderBuffer) bool {
- for i := 0; i < rb.nrune; i++ {
- info := rb.rune[i]
- if bp+int(info.size) > len(s) {
- return false
- }
- p := info.pos
- pe := p + info.size
- for ; p < pe; p++ {
- if s[bp] != rb.byte[p] {
- return false
- }
- bp++
- }
- }
- return true
- })
- for bp < len(s) {
- if bp = decomposeSegment(&rb, bp, true); bp < 0 {
- return false
- }
- bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true)
- }
- return true
-}
-
-// patchTail fixes a case where a rune may be incorrectly normalized
-// if it is followed by illegal continuation bytes. It returns the
-// patched buffer and whether the decomposition is still in progress.
-func patchTail(rb *reorderBuffer) bool {
- info, p := lastRuneStart(&rb.f, rb.out)
- if p == -1 || info.size == 0 {
- return true
- }
- end := p + int(info.size)
- extra := len(rb.out) - end
- if extra > 0 {
- // Potentially allocating memory. However, this only
- // happens with ill-formed UTF-8.
- x := make([]byte, 0)
- x = append(x, rb.out[len(rb.out)-extra:]...)
- rb.out = rb.out[:end]
- decomposeToLastBoundary(rb)
- rb.doFlush()
- rb.out = append(rb.out, x...)
- return false
- }
- buf := rb.out[p:]
- rb.out = rb.out[:p]
- decomposeToLastBoundary(rb)
- if s := rb.ss.next(info); s == ssStarter {
- rb.doFlush()
- rb.ss.first(info)
- } else if s == ssOverflow {
- rb.doFlush()
- rb.insertCGJ()
- rb.ss = 0
- }
- rb.insertUnsafe(inputBytes(buf), 0, info)
- return true
-}
-
-func appendQuick(rb *reorderBuffer, i int) int {
- if rb.nsrc == i {
- return i
- }
- end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true)
- rb.out = rb.src.appendSlice(rb.out, i, end)
- return end
-}
-
-// Append returns f(append(out, b...)).
-// The buffer out must be nil, empty, or equal to f(out).
-func (f Form) Append(out []byte, src ...byte) []byte {
- return f.doAppend(out, inputBytes(src), len(src))
-}
-
-func (f Form) doAppend(out []byte, src input, n int) []byte {
- if n == 0 {
- return out
- }
- ft := formTable[f]
- // Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer.
- if len(out) == 0 {
- p, _ := ft.quickSpan(src, 0, n, true)
- out = src.appendSlice(out, 0, p)
- if p == n {
- return out
- }
- rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush}
- return doAppendInner(&rb, p)
- }
- rb := reorderBuffer{f: *ft, src: src, nsrc: n}
- return doAppend(&rb, out, 0)
-}
-
-func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
- rb.setFlusher(out, appendFlush)
- src, n := rb.src, rb.nsrc
- doMerge := len(out) > 0
- if q := src.skipContinuationBytes(p); q > p {
- // Move leading non-starters to destination.
- rb.out = src.appendSlice(rb.out, p, q)
- p = q
- doMerge = patchTail(rb)
- }
- fd := &rb.f
- if doMerge {
- var info Properties
- if p < n {
- info = fd.info(src, p)
- if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 {
- if p == 0 {
- decomposeToLastBoundary(rb)
- }
- p = decomposeSegment(rb, p, true)
- }
- }
- if info.size == 0 {
- rb.doFlush()
- // Append incomplete UTF-8 encoding.
- return src.appendSlice(rb.out, p, n)
- }
- if rb.nrune > 0 {
- return doAppendInner(rb, p)
- }
- }
- p = appendQuick(rb, p)
- return doAppendInner(rb, p)
-}
-
-func doAppendInner(rb *reorderBuffer, p int) []byte {
- for n := rb.nsrc; p < n; {
- p = decomposeSegment(rb, p, true)
- p = appendQuick(rb, p)
- }
- return rb.out
-}
-
-// AppendString returns f(append(out, []byte(s))).
-// The buffer out must be nil, empty, or equal to f(out).
-func (f Form) AppendString(out []byte, src string) []byte {
- return f.doAppend(out, inputString(src), len(src))
-}
-
-// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]).
-// It is not guaranteed to return the largest such n.
-func (f Form) QuickSpan(b []byte) int {
- n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true)
- return n
-}
-
-// Span implements transform.SpanningTransformer. It returns a boundary n such
-// that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
-func (f Form) Span(b []byte, atEOF bool) (n int, err error) {
- n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF)
- if n < len(b) {
- if !ok {
- err = transform.ErrEndOfSpan
- } else {
- err = transform.ErrShortSrc
- }
- }
- return n, err
-}
-
-// SpanString returns a boundary n such that s[0:n] == f(s[0:n]).
-// It is not guaranteed to return the largest such n.
-func (f Form) SpanString(s string, atEOF bool) (n int, err error) {
- n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF)
- if n < len(s) {
- if !ok {
- err = transform.ErrEndOfSpan
- } else {
- err = transform.ErrShortSrc
- }
- }
- return n, err
-}
-
-// quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and
-// whether any non-normalized parts were found. If atEOF is false, n will
-// not point past the last segment if this segment might be become
-// non-normalized by appending other runes.
-func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) {
- var lastCC uint8
- ss := streamSafe(0)
- lastSegStart := i
- for n = end; i < n; {
- if j := src.skipASCII(i, n); i != j {
- i = j
- lastSegStart = i - 1
- lastCC = 0
- ss = 0
- continue
- }
- info := f.info(src, i)
- if info.size == 0 {
- if atEOF {
- // include incomplete runes
- return n, true
- }
- return lastSegStart, true
- }
- // This block needs to be before the next, because it is possible to
- // have an overflow for runes that are starters (e.g. with U+FF9E).
- switch ss.next(info) {
- case ssStarter:
- lastSegStart = i
- case ssOverflow:
- return lastSegStart, false
- case ssSuccess:
- if lastCC > info.ccc {
- return lastSegStart, false
- }
- }
- if f.composing {
- if !info.isYesC() {
- break
- }
- } else {
- if !info.isYesD() {
- break
- }
- }
- lastCC = info.ccc
- i += int(info.size)
- }
- if i == n {
- if !atEOF {
- n = lastSegStart
- }
- return n, true
- }
- return lastSegStart, false
-}
-
-// QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]).
-// It is not guaranteed to return the largest such n.
-func (f Form) QuickSpanString(s string) int {
- n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true)
- return n
-}
-
-// FirstBoundary returns the position i of the first boundary in b
-// or -1 if b contains no boundary.
-func (f Form) FirstBoundary(b []byte) int {
- return f.firstBoundary(inputBytes(b), len(b))
-}
-
-func (f Form) firstBoundary(src input, nsrc int) int {
- i := src.skipContinuationBytes(0)
- if i >= nsrc {
- return -1
- }
- fd := formTable[f]
- ss := streamSafe(0)
- // We should call ss.first here, but we can't as the first rune is
- // skipped already. This means FirstBoundary can't really determine
- // CGJ insertion points correctly. Luckily it doesn't have to.
- for {
- info := fd.info(src, i)
- if info.size == 0 {
- return -1
- }
- if s := ss.next(info); s != ssSuccess {
- return i
- }
- i += int(info.size)
- if i >= nsrc {
- if !info.BoundaryAfter() && !ss.isMax() {
- return -1
- }
- return nsrc
- }
- }
-}
-
-// FirstBoundaryInString returns the position i of the first boundary in s
-// or -1 if s contains no boundary.
-func (f Form) FirstBoundaryInString(s string) int {
- return f.firstBoundary(inputString(s), len(s))
-}
-
-// NextBoundary reports the index of the boundary between the first and next
-// segment in b or -1 if atEOF is false and there are not enough bytes to
-// determine this boundary.
-func (f Form) NextBoundary(b []byte, atEOF bool) int {
- return f.nextBoundary(inputBytes(b), len(b), atEOF)
-}
-
-// NextBoundaryInString reports the index of the boundary between the first and
-// next segment in b or -1 if atEOF is false and there are not enough bytes to
-// determine this boundary.
-func (f Form) NextBoundaryInString(s string, atEOF bool) int {
- return f.nextBoundary(inputString(s), len(s), atEOF)
-}
-
-func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
- if nsrc == 0 {
- if atEOF {
- return 0
- }
- return -1
- }
- fd := formTable[f]
- info := fd.info(src, 0)
- if info.size == 0 {
- if atEOF {
- return 1
- }
- return -1
- }
- ss := streamSafe(0)
- ss.first(info)
-
- for i := int(info.size); i < nsrc; i += int(info.size) {
- info = fd.info(src, i)
- if info.size == 0 {
- if atEOF {
- return i
- }
- return -1
- }
- // TODO: Using streamSafe to determine the boundary isn't the same as
- // using BoundaryBefore. Determine which should be used.
- if s := ss.next(info); s != ssSuccess {
- return i
- }
- }
- if !atEOF && !info.BoundaryAfter() && !ss.isMax() {
- return -1
- }
- return nsrc
-}
-
-// LastBoundary returns the position i of the last boundary in b
-// or -1 if b contains no boundary.
-func (f Form) LastBoundary(b []byte) int {
- return lastBoundary(formTable[f], b)
-}
-
-func lastBoundary(fd *formInfo, b []byte) int {
- i := len(b)
- info, p := lastRuneStart(fd, b)
- if p == -1 {
- return -1
- }
- if info.size == 0 { // ends with incomplete rune
- if p == 0 { // starts with incomplete rune
- return -1
- }
- i = p
- info, p = lastRuneStart(fd, b[:i])
- if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter
- return i
- }
- }
- if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
- return i
- }
- if info.BoundaryAfter() {
- return i
- }
- ss := streamSafe(0)
- v := ss.backwards(info)
- for i = p; i >= 0 && v != ssStarter; i = p {
- info, p = lastRuneStart(fd, b[:i])
- if v = ss.backwards(info); v == ssOverflow {
- break
- }
- if p+int(info.size) != i {
- if p == -1 { // no boundary found
- return -1
- }
- return i // boundary after an illegal UTF-8 encoding
- }
- }
- return i
-}
-
-// decomposeSegment scans the first segment in src into rb. It inserts 0x034f
-// (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters
-// and returns the number of bytes consumed from src or iShortDst or iShortSrc.
-func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
- // Force one character to be consumed.
- info := rb.f.info(rb.src, sp)
- if info.size == 0 {
- return 0
- }
- if s := rb.ss.next(info); s == ssStarter {
- // TODO: this could be removed if we don't support merging.
- if rb.nrune > 0 {
- goto end
- }
- } else if s == ssOverflow {
- rb.insertCGJ()
- goto end
- }
- if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
- return int(err)
- }
- for {
- sp += int(info.size)
- if sp >= rb.nsrc {
- if !atEOF && !info.BoundaryAfter() {
- return int(iShortSrc)
- }
- break
- }
- info = rb.f.info(rb.src, sp)
- if info.size == 0 {
- if !atEOF {
- return int(iShortSrc)
- }
- break
- }
- if s := rb.ss.next(info); s == ssStarter {
- break
- } else if s == ssOverflow {
- rb.insertCGJ()
- break
- }
- if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
- return int(err)
- }
- }
-end:
- if !rb.doFlush() {
- return int(iShortDst)
- }
- return sp
-}
-
-// lastRuneStart returns the runeInfo and position of the last
-// rune in buf or the zero runeInfo and -1 if no rune was found.
-func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) {
- p := len(buf) - 1
- for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
- }
- if p < 0 {
- return Properties{}, -1
- }
- return fd.info(inputBytes(buf), p), p
-}
-
-// decomposeToLastBoundary finds an open segment at the end of the buffer
-// and scans it into rb. Returns the buffer minus the last segment.
-func decomposeToLastBoundary(rb *reorderBuffer) {
- fd := &rb.f
- info, i := lastRuneStart(fd, rb.out)
- if int(info.size) != len(rb.out)-i {
- // illegal trailing continuation bytes
- return
- }
- if info.BoundaryAfter() {
- return
- }
- var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order
- padd := 0
- ss := streamSafe(0)
- p := len(rb.out)
- for {
- add[padd] = info
- v := ss.backwards(info)
- if v == ssOverflow {
- // Note that if we have an overflow, it the string we are appending to
- // is not correctly normalized. In this case the behavior is undefined.
- break
- }
- padd++
- p -= int(info.size)
- if v == ssStarter || p < 0 {
- break
- }
- info, i = lastRuneStart(fd, rb.out[:p])
- if int(info.size) != p-i {
- break
- }
- }
- rb.ss = ss
- // Copy bytes for insertion as we may need to overwrite rb.out.
- var buf [maxBufferSize * utf8.UTFMax]byte
- cp := buf[:copy(buf[:], rb.out[p:])]
- rb.out = rb.out[:p]
- for padd--; padd >= 0; padd-- {
- info = add[padd]
- rb.insertUnsafe(inputBytes(cp), 0, info)
- cp = cp[info.size:]
- }
-}